library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
readxl,
googlesheets4,
ggrepel
)
theme_set(theme_bw())
options(
digits = 3
)
#scimagojr
scimagojr_files = dir(path = "data", pattern = "^scimag")[-1]
scimagojr = map2_df(scimagojr_files, scimagojr_files %>% str_match("\\d+"), function(file, year) {
x = read_excel("data/" + file) %>% mutate(year = year)
}) %>%
df_legalize_names()
#ISO
scimagojr$ISO = scimagojr$Country %>% pu_translate()
## No exact match: Democratic Republic Congo
## No exact match: Saint Martin (Dutch)
## No exact match: Saint Martin (French)
## Best fuzzy match found: Democratic Republic Congo -> Democratic Republic of Congo with distance 3.00
## Best fuzzy match found: Saint Martin (Dutch) -> Saint Martin (FRA) with distance 5.00
## Best fuzzy match found: Saint Martin (French) -> Saint Martin (France) with distance 2.00
#population counts
un_pop = read_excel("data/WPP2022_GEN_F01_DEMOGRAPHIC_INDICATORS_COMPACT_REV1.xlsx",
skip = 16,
col_types = c(`ISO3 Alpha-code` = "text")
) %>%
df_legalize_names() %>%
rename(ISO = "ISO3_Alpha_code",
year = Year) %>%
mutate(
Total_Population_as_of_1_January_thousands = as.numeric(Total_Population_as_of_1_January_thousands)
)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Total_Population_as_of_1_January_thousands =
## as.numeric(Total_Population_as_of_1_January_thousands)`.
## Caused by warning:
## ! NAs introduced by coercion
#the 2022 data are still in the forecasted sheet, and we need year 2022
un_pop_2022 = read_excel("data/WPP2022_GEN_F01_DEMOGRAPHIC_INDICATORS_COMPACT_REV1.xlsx",
skip = 16,
sheet = 2,
col_types = c(`ISO3 Alpha-code` = "text")
) %>%
filter(Year == 2022) %>%
df_legalize_names() %>%
rename(ISO = "ISO3_Alpha_code",
year = Year) %>%
mutate(
Total_Population_as_of_1_January_thousands = as.numeric(Total_Population_as_of_1_January_thousands)
)
un_pop = bind_rows(
un_pop,
un_pop_2022
)
#calculate per capita for each (year x ISO)
scimagojr_pc = inner_join(
scimagojr,
un_pop,
by = c("ISO", "year")
)
#per capita for each year-ISO
scimagojr_pc$Citable_documents_p1M = scimagojr_pc$Citable_documents / (scimagojr_pc$Total_Population_as_of_1_January_thousands/1000)
#nature index data
#files are inconsistent
# nature_index_files = dir(path = "data/nature_index/")
# nature_index = map2_df(nature_index_files, str_match(nature_index_files, "\\d+"), function(file, .year) {
# # browser()
# x = read_csv("data/nature_index/" + file) %>%
# mutate(year = as.numeric(.year)-1)
#
# colnames(x) = c("rank", "country", "nature_share", "nature_count")
#
# x
# })
gs4_deauth()
nature_index = read_sheet("https://docs.google.com/spreadsheets/d/1-gdYljdpCfPaV1AcwwYaiBCdJLB1THK8pw1_SSXjTIY/edit#gid=2024502531", sheet = 2, skip = 1)
## ✔ Reading from "science production per capita".
## ✔ Range ''nature index data'!2:10000000'.
nature_index$ISO = pu_translate(nature_index$country)
## No exact match: United States of America (USA)
## No exact match: United Kingdom (UK)
## Best fuzzy match found: United States of America (USA) -> United States of America with distance 6.00
## Best fuzzy match found: United Kingdom (UK) -> United Kingdom with distance 5.00
nature_index_pc = inner_join(
nature_index,
un_pop %>% mutate(year = as.numeric(year)),
by = c("year", "ISO")
)
nature_index_pc$nature_count_pc = nature_index_pc$nature_count / nature_index_pc$Total_Population_as_of_1_January_thousands*1000
#simple per capita count in 2022
scimagojr_pc_2022 = scimagojr_pc %>%
filter(year == 2022) %>%
arrange(-Citable_documents_p1M) %>%
select(Country, Citable_documents, Citable_documents_p1M, Total_Population_as_of_1_January_thousands)
#table
scimagojr_pc_2022 %>%
write_clipboard()
## Country Citable documents Citable documents p1M
## 1 Vatican City State 8.00 15748.03
## 2 Monaco 257.00 7029.35
## 3 Macao 4839.00 7006.27
## 4 Falkland Islands (Malvinas) 24.00 6364.36
## 5 Switzerland 51311.00 5889.68
## 6 Iceland 2028.00 5456.42
## 7 Denmark 31831.00 5424.53
## 8 Norway 27613.00 5100.00
## 9 Sweden 45467.00 4322.92
## 10 Luxembourg 2770.00 4301.76
## 11 Australia 111601.00 4284.72
## 12 Finland 23512.00 4245.37
## 13 Singapore 25253.00 4240.00
## 14 Hong Kong 30785.00 4110.61
## 15 Liechtenstein 154.00 3928.97
## 16 Netherlands 65872.00 3756.27
## 17 Ireland 18365.00 3668.68
## 18 Cyprus 4505.00 3612.44
## 19 Slovenia 7431.00 3505.54
## 20 New Zealand 17643.00 3416.75
## 21 Austria 29320.00 3283.42
## 22 Portugal 32903.00 3199.99
## 23 Belgium 36524.00 3137.62
## 24 Estonia 4092.00 3082.25
## 25 Canada 117417.00 3066.45
## 26 United Kingdom 205867.00 3054.66
## 27 Saint Kitts and Nevis 140.00 2940.62
## 28 Faroe Islands 147.00 2773.85
## 29 Israel 24673.00 2750.39
## 30 Greenland 134.00 2377.28
## 31 Czech Republic 24719.00 2354.75
## 32 Brunei Darussalam 1051.00 2350.24
## 33 Malta 1247.00 2342.33
## 34 Spain 110360.00 2319.65
## 35 Italy 136051.00 2301.29
## 36 Greece 23825.00 2288.75
## 37 Croatia 9104.00 2252.81
## 38 Qatar 5894.00 2195.71
## 39 Germany 183077.00 2194.46
## 40 Gibraltar 68.00 2084.29
## 41 Grenada 250.00 1999.15
## 42 South Korea 98876.00 1907.73
## 43 Taiwan 45342.00 1898.92
## 44 Lithuania 5206.00 1880.33
## 45 United States 623186.00 1846.48
## 46 San Marino 62.00 1841.46
## 47 Montserrat 8.00 1822.74
## 48 France 112159.00 1737.27
## 49 Slovakia 8826.00 1622.59
## 50 United Arab Emirates 15242.00 1620.93
## 51 Palau 29.00 1606.56
## 52 Saudi Arabia 56864.00 1573.59
## 53 Latvia 2838.00 1525.00
## 54 Hungary 14163.00 1461.83
## 55 Poland 54711.00 1430.85
## 56 Serbia 9222.00 1269.97
## 57 Malaysia 42099.00 1247.33
## 58 Bahrain 1759.00 1200.17
## 59 Japan 130095.00 1046.80
## 60 Bulgaria 6903.00 1010.15
## 61 Romania 17698.00 918.77
## 62 Chile 17680.00 902.39
## 63 Anguilla 14.00 884.34
## 64 Bermuda 56.00 871.79
## 65 Seychelles 92.00 860.72
## 66 Oman 3909.00 860.39
## 67 Iran 75091.00 850.83
## 68 Montenegro 530.00 844.61
## 69 Jordan 9422.00 837.05
## 70 Tunisia 9835.00 799.03
## 71 Turkey 67256.00 790.39
## 72 Lebanon 4300.00 774.23
## 73 Kuwait 3083.00 725.80
## 74 Russian Federation 104433.00 721.56
## 75 Barbados 199.00 707.08
## 76 New Caledonia 200.00 693.45
## 77 China 985085.00 690.84
## 78 Uruguay 2077.00 606.85
## 79 Cook Islands 10.00 588.62
## 80 Bosnia and Herzegovina 1865.00 574.55
## 81 Macedonia 1200.00 571.83
## 82 Fiji 522.00 563.43
## 83 Andorra 44.00 552.15
## 84 Tokelau 1.00 537.35
## 85 Georgia 1989.00 529.93
## 86 South Africa 31037.00 520.35
## 87 Armenia 1343.00 482.53
## 88 Guam 77.00 449.81
## 89 Iraq 19464.00 442.40
## 90 Cayman Islands 30.00 438.64
## 91 French Polynesia 130.00 426.28
## 92 Antigua and Barbuda 39.00 417.17
## 93 Ukraine 17597.00 406.08
## 94 Brazil 86987.00 404.92
## 95 Egypt 42493.00 385.83
## 96 Saint Martin (Dutch) 12.00 379.28
## 97 Thailand 26865.00 375.00
## 98 Mauritius 470.00 361.78
## 99 Dominica 26.00 358.13
## 100 Ecuador 6268.00 350.06
## 101 Botswana 910.00 348.98
## 102 Samoa 75.00 339.78
## 103 French Guiana 101.00 335.81
## 104 Argentina 15083.00 332.30
## 105 Palestine 1723.00 331.99
## 106 Albania 939.00 329.72
## 107 Trinidad and Tobago 494.00 323.10
## 108 Bhutan 252.00 323.09
## 109 Morocco 11949.00 320.65
## 110 Kazakhstan 6012.00 311.58
## 111 Aruba 33.00 309.65
## 112 Curaçao 59.00 309.38
## 113 Colombia 15817.00 305.41
## 114 Costa Rica 1530.00 296.11
## 115 Belarus 2699.00 282.62
## 116 Tuvalu 3.00 266.26
## 117 Virgin Islands (British) 8.00 256.54
## 118 Namibia 645.00 253.02
## 119 Virgin Islands (U.S.) 25.00 250.40
## 120 Mongolia 834.00 247.23
## 121 Tonga 26.00 244.36
## 122 Marshall Islands 10.00 241.81
## 123 Nauru 3.00 237.93
## 124 Peru 8041.00 237.12
## 125 Mexico 30052.00 236.58
## 126 Maldives 123.00 234.32
## 127 Algeria 10387.00 233.19
## 128 Saint Vincent and the Grenadines 24.00 230.52
## 129 Panama 989.00 225.84
## 130 Bahamas 85.00 208.00
## 131 Guadeloupe 81.00 204.59
## 132 Azerbaijan 1948.00 188.58
## 133 Ghana 6200.00 187.00
## 134 Viet Nam 17707.00 180.97
## 135 Sri Lanka 3913.00 179.50
## 136 Cuba 2013.00 179.40
## 137 India 248644.00 176.05
## 138 Turks and Caicos Islands 8.00 175.71
## 139 Moldova 532.00 174.52
## 140 Pakistan 39589.00 169.53
## 141 Vanuatu 53.00 164.15
## 142 Martinique 60.00 163.15
## 143 Northern Mariana Islands 8.00 161.88
## 144 Belize 64.00 158.95
## 145 Indonesia 42431.00 154.51
## 146 Swaziland 182.00 151.99
## 147 Federated States of Micronesia 17.00 149.60
## 148 Libya 994.00 146.72
## 149 Jamaica 402.00 142.10
## 150 Cape Verde 80.00 135.48
## 151 Gabon 320.00 135.29
## 152 Gambia 293.00 109.62
## 153 Nepal 3308.00 108.92
## 154 Guyana 82.00 101.66
## 155 Uzbekistan 3462.00 100.78
## 156 Cameroon 2714.00 98.50
## 157 Saint Martin (French) 3.00 94.82
## 158 Saint Lucia 17.00 94.57
## 159 Ethiopia 11376.00 93.38
## 160 Kenya 4932.00 92.19
## 161 Kyrgyzstan 600.00 91.21
## 162 Wallis and Futuna 1.00 86.11
## 163 Kiribati 11.00 84.57
## 164 Zimbabwe 1358.00 84.07
## 165 Reunion 75.00 77.31
## 166 Suriname 47.00 76.34
## 167 Congo 437.00 74.04
## 168 Nigeria 15908.00 73.67
## 169 Bangladesh 12534.00 73.60
## 170 Rwanda 998.00 73.28
## 171 Sao Tome and Principe 16.00 71.06
## 172 Paraguay 471.00 69.85
## 173 Lesotho 158.00 68.87
## 174 Benin 855.00 64.90
## 175 Uganda 2934.00 62.98
## 176 Senegal 1026.00 60.01
## 177 Philippines 6621.00 57.74
## 178 Malawi 1156.00 57.38
## 179 Yemen 1897.00 56.93
## 180 Solomon Islands 40.00 55.85
## 181 Syrian Arab Republic 1177.00 54.52
## 182 Liberia 274.00 52.23
## 183 Zambia 1013.00 51.30
## 184 Bolivia 538.00 44.28
## 185 Venezuela 1208.00 43.07
## 186 Laos 311.00 41.59
## 187 Tanzania 2674.00 41.43
## 188 Burkina Faso 907.00 40.51
## 189 Cambodia 637.00 38.19
## 190 Timor-Leste 50.00 37.55
## 191 Sudan 1720.00 37.18
## 192 Djibouti 41.00 36.83
## 193 Togo 320.00 36.59
## 194 Dominican Republic 395.00 35.34
## 195 Honduras 358.00 34.57
## 196 Sierra Leone 292.00 34.30
## 197 El Salvador 216.00 34.16
## 198 Guinea-Bissau 58.00 27.84
## 199 Papua New Guinea 264.00 26.27
## 200 Tajikistan 257.00 26.07
## 201 Côte d’Ivoire 719.00 25.85
## 202 Guatemala 437.00 24.66
## 203 Eritrea 88.00 24.09
## 204 Nicaragua 155.00 22.47
## 205 Mozambique 706.00 21.71
## 206 Mauritania 92.00 19.68
## 207 Mali 403.00 18.11
## 208 Comoros 15.00 18.09
## 209 Mayotte 5.00 15.57
## 210 Afghanistan 625.00 15.40
## 211 Madagascar 445.00 15.21
## 212 Somalia 260.00 15.00
## 213 Central African Republic 78.00 14.18
## 214 North Korea 349.00 13.41
## 215 Haïti 154.00 13.37
## 216 Equatorial Guinea 21.00 12.69
## 217 Guinea 159.00 11.61
## 218 Myanmar 568.00 10.52
## 219 Burundi 120.00 9.44
## 220 Niger 240.00 9.33
## 221 Republic of South Sudan 99.00 9.14
## 222 Democratic Republic Congo 682.00 7.00
## 223 Chad 108.00 6.19
## 224 Angola 203.00 5.79
## 225 Turkmenistan 36.00 5.64
## 226 Western Sahara 1.00 1.75
## Total Population as of 1 January thousands
## 1 5.10e-01
## 2 3.66e+01
## 3 6.91e+02
## 4 3.77e+00
## 5 8.71e+03
## 6 3.72e+02
## 7 5.87e+03
## 8 5.41e+03
## 9 1.05e+04
## 10 6.44e+02
## 11 2.60e+04
## 12 5.54e+03
## 13 5.96e+03
## 14 7.49e+03
## 15 3.92e+01
## 16 1.75e+04
## 17 5.01e+03
## 18 1.25e+03
## 19 2.12e+03
## 20 5.16e+03
## 21 8.93e+03
## 22 1.03e+04
## 23 1.16e+04
## 24 1.33e+03
## 25 3.83e+04
## 26 6.74e+04
## 27 4.76e+01
## 28 5.30e+01
## 29 8.97e+03
## 30 5.64e+01
## 31 1.05e+04
## 32 4.47e+02
## 33 5.32e+02
## 34 4.76e+04
## 35 5.91e+04
## 36 1.04e+04
## 37 4.04e+03
## 38 2.68e+03
## 39 8.34e+04
## 40 3.26e+01
## 41 1.25e+02
## 42 5.18e+04
## 43 2.39e+04
## 44 2.77e+03
## 45 3.37e+05
## 46 3.37e+01
## 47 4.39e+00
## 48 6.46e+04
## 49 5.44e+03
## 50 9.40e+03
## 51 1.81e+01
## 52 3.61e+04
## 53 1.86e+03
## 54 9.69e+03
## 55 3.82e+04
## 56 7.26e+03
## 57 3.38e+04
## 58 1.47e+03
## 59 1.24e+05
## 60 6.83e+03
## 61 1.93e+04
## 62 1.96e+04
## 63 1.58e+01
## 64 6.42e+01
## 65 1.07e+02
## 66 4.54e+03
## 67 8.83e+04
## 68 6.28e+02
## 69 1.13e+04
## 70 1.23e+04
## 71 8.51e+04
## 72 5.55e+03
## 73 4.25e+03
## 74 1.45e+05
## 75 2.81e+02
## 76 2.88e+02
## 77 1.43e+06
## 78 3.42e+03
## 79 1.70e+01
## 80 3.25e+03
## 81 2.10e+03
## 82 9.26e+02
## 83 7.97e+01
## 84 1.86e+00
## 85 3.75e+03
## 86 5.96e+04
## 87 2.78e+03
## 88 1.71e+02
## 89 4.40e+04
## 90 6.84e+01
## 91 3.05e+02
## 92 9.35e+01
## 93 4.33e+04
## 94 2.15e+05
## 95 1.10e+05
## 96 3.16e+01
## 97 7.16e+04
## 98 1.30e+03
## 99 7.26e+01
## 100 1.79e+04
## 101 2.61e+03
## 102 2.21e+02
## 103 3.01e+02
## 104 4.54e+04
## 105 5.19e+03
## 106 2.85e+03
## 107 1.53e+03
## 108 7.80e+02
## 109 3.73e+04
## 110 1.93e+04
## 111 1.07e+02
## 112 1.91e+02
## 113 5.18e+04
## 114 5.17e+03
## 115 9.55e+03
## 116 1.13e+01
## 117 3.12e+01
## 118 2.55e+03
## 119 9.98e+01
## 120 3.37e+03
## 121 1.06e+02
## 122 4.14e+01
## 123 1.26e+01
## 124 3.39e+04
## 125 1.27e+05
## 126 5.25e+02
## 127 4.45e+04
## 128 1.04e+02
## 129 4.38e+03
## 130 4.09e+02
## 131 3.96e+02
## 132 1.03e+04
## 133 3.32e+04
## 134 9.78e+04
## 135 2.18e+04
## 136 1.12e+04
## 137 1.41e+06
## 138 4.55e+01
## 139 3.05e+03
## 140 2.34e+05
## 141 3.23e+02
## 142 3.68e+02
## 143 4.94e+01
## 144 4.03e+02
## 145 2.75e+05
## 146 1.20e+03
## 147 1.14e+02
## 148 6.77e+03
## 149 2.83e+03
## 150 5.90e+02
## 151 2.37e+03
## 152 2.67e+03
## 153 3.04e+04
## 154 8.07e+02
## 155 3.44e+04
## 156 2.76e+04
## 157 3.16e+01
## 158 1.80e+02
## 159 1.22e+05
## 160 5.35e+04
## 161 6.58e+03
## 162 1.16e+01
## 163 1.30e+02
## 164 1.62e+04
## 165 9.70e+02
## 166 6.16e+02
## 167 5.90e+03
## 168 2.16e+05
## 169 1.70e+05
## 170 1.36e+04
## 171 2.25e+02
## 172 6.74e+03
## 173 2.29e+03
## 174 1.32e+04
## 175 4.66e+04
## 176 1.71e+04
## 177 1.15e+05
## 178 2.01e+04
## 179 3.33e+04
## 180 7.16e+02
## 181 2.16e+04
## 182 5.25e+03
## 183 1.97e+04
## 184 1.21e+04
## 185 2.80e+04
## 186 7.48e+03
## 187 6.45e+04
## 188 2.24e+04
## 189 1.67e+04
## 190 1.33e+03
## 191 4.63e+04
## 192 1.11e+03
## 193 8.75e+03
## 194 1.12e+04
## 195 1.04e+04
## 196 8.51e+03
## 197 6.32e+03
## 198 2.08e+03
## 199 1.00e+04
## 200 9.86e+03
## 201 2.78e+04
## 202 1.77e+04
## 203 3.65e+03
## 204 6.90e+03
## 205 3.25e+04
## 206 4.67e+03
## 207 2.22e+04
## 208 8.29e+02
## 209 3.21e+02
## 210 4.06e+04
## 211 2.93e+04
## 212 1.73e+04
## 213 5.50e+03
## 214 2.60e+04
## 215 1.15e+04
## 216 1.66e+03
## 217 1.37e+04
## 218 5.40e+04
## 219 1.27e+04
## 220 2.57e+04
## 221 1.08e+04
## 222 9.74e+04
## 223 1.74e+04
## 224 3.50e+04
## 225 6.39e+03
## 226 5.70e+02
#aggregated results
scimagojr_pc$Region_emil = scimagojr_pc$Region
scimagojr_pc$Region_emil = case_when(
scimagojr_pc$Country %in% c("Denmark", "Sweden", "Norway", "Iceland", "Finland") ~ "Nordic",
scimagojr_pc$Country %in% c("United Kingdom", "Canada", "New Zealand", "Australia") ~ "Other Anglos",
scimagojr_pc$Region_emil %in% "Western Europe" ~ "Rest of Western Europe",
scimagojr_pc$Country == "United States" ~ "USA",
scimagojr_pc$Country == "China" ~ "China",
scimagojr_pc$Country == "Saint Pierre and Miquelon" ~ "Latin America",
scimagojr_pc$Country %in% c("Japan", "South Korea") ~ "Japan & South Korea",
scimagojr_pc$Country == "India" ~ "India",
scimagojr_pc$Region_emil %in% "Asiatic Region" ~ "Rest of Asia",
.default = scimagojr_pc$Region_emil
)
scimagojr_pc$Region_emil %>% table2()
scimagojr_pc_aggr = plyr::ddply(scimagojr_pc, c("Region_emil", "year"), function(dd) {
tibble(
pop_k = sum(dd$Total_Population_as_of_1_January_thousands),
Citable_documents = sum(dd$Citable_documents),
Citable_documents_p1M = Citable_documents / (pop_k/1000)
)
}) %>%
mutate(
year = as.numeric(year)
)
#plot
scimagojr_pc_aggr %>%
ggplot(aes(year, Citable_documents_p1M, color = Region_emil, group = Region_emil)) +
geom_line() +
scale_y_continuous("Academic publications per 1M residents") +
scale_x_continuous(limits = c(NA, 2023)) +
scale_color_discrete("Region / Country", guide = NULL) +
geom_text_repel(data = scimagojr_pc_aggr %>% filter(year == 2022), mapping = aes(label = Region_emil, x = year, y = Citable_documents_p1M), vjust = "bottom", min.segment.length = Inf, max.overlaps = 20) +
ggtitle("Scientific productivity by region / country")
GG_save("figs/timeline_productivity.png")
nature_index_pc %>%
filter(year >= 2016) %>%
ggplot(aes(year, nature_count, color = country, group = country)) +
geom_line() +
scale_y_continuous("Nature count index") +
scale_x_continuous(limits = c(NA, 2023)) +
scale_color_discrete(guide = NULL) +
geom_text_repel(data = nature_index_pc %>% filter(year == 2022), mapping = aes(label = country, x = year, y = nature_count), vjust = "bottom", max.overlaps = 20) +
ggtitle("Nature count index by country")
## Warning: ggrepel: 38 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
GG_save("figs/nature count index absolute.png")
## Warning: ggrepel: 35 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
nature_index_pc %>%
filter(year >= 2016) %>%
ggplot(aes(year, nature_count_pc, color = country, group = country)) +
geom_line() +
scale_y_continuous("Nature count index") +
scale_x_continuous(limits = c(NA, 2023)) +
scale_color_discrete(guide = NULL) +
geom_text_repel(data = nature_index_pc %>% filter(year == 2022), mapping = aes(label = country, x = year, y = nature_count_pc), vjust = "bottom", max.overlaps = 20) +
ggtitle("Nature count index per capita by country")
## Warning: ggrepel: 19 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
GG_save("figs/nature count index pc.png")
## Warning: ggrepel: 16 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
#region codings
nature_index_pc$Region_emil = nature_index_pc$country
nature_index_pc$Region_emil = case_when(
nature_index_pc$country %in% c("Denmark", "Sweden", "Norway", "Iceland", "Finland") ~ "Nordic",
nature_index_pc$country %in% c("United Kingdom (UK)", "Canada", "New Zealand", "Australia") ~ "Other Anglos",
nature_index_pc$country %in% c("Austria", "Germany", "Switzerland") ~ "German Europe",
nature_index_pc$country %in% c("Spain", "Portugal", "Italy", "Greece", "Cyprus") ~ "Southern Europe",
nature_index_pc$country %in% c("France", "Belgium", "Luxembourg", "Netherlands", "Ireland") ~ "Rest of Western Europe",
nature_index_pc$country %in% c("Russia", "Poland", "Czech Republic", "Ukraine", "Slovenia", "Croatia", "Estonia", "Slovakia", "Romania", "Serbia", "Hungary") ~ "Southest & East Europe",
nature_index_pc$country %in% c("Pakistan", "Saudi Arabia", "Turkey", "Iran", "United Arab Emirates") ~ "MENAPT",
nature_index_pc$country == "United States of America (USA)" ~ "USA",
nature_index_pc$country %in% c("Chile", "Argentina", "Brazil", "Mexico", "Colombia") ~ "Latin America",
nature_index_pc$country %in% c("Japan", "South Korea") ~ "Japan & South Korea",
nature_index_pc$country %in% c("Vietnam", "Thailand") ~ "Other Asian",
.default = nature_index_pc$Region_emil
)
nature_index_pc$Region_emil %>% table2() %>% print(n = Inf)
## # A tibble: 18 × 3
## Group Count Percent
## <chr> <dbl> <dbl>
## 1 Southest & East Europe 72 18
## 2 Latin America 40 10
## 3 Nordic 40 10
## 4 Rest of Western Europe 38 9.5
## 5 MENAPT 36 9
## 6 Southern Europe 34 8.5
## 7 Other Anglos 32 8
## 8 German Europe 24 6
## 9 Japan & South Korea 16 4
## 10 Other Asian 12 3
## 11 China 8 2
## 12 India 8 2
## 13 Israel 8 2
## 14 Singapore 8 2
## 15 South Africa 8 2
## 16 Taiwan 8 2
## 17 USA 8 2
## 18 <NA> 0 0
nature_index_pc_aggr = plyr::ddply(nature_index_pc, c("Region_emil", "year"), function(dd) {
tibble(
pop_k = sum(dd$Total_Population_as_of_1_January_thousands),
nature_count = sum(dd$nature_count),
nature_count_p1M = nature_count / (pop_k/1000)
)
}) %>%
mutate(
year = as.numeric(year)
)
nature_index_pc_aggr %>%
filter(year >= 2016) %>%
ggplot(aes(year, nature_count_p1M, color = Region_emil, group = Region_emil)) +
geom_line() +
scale_y_continuous("Nature count index, per million residents") +
scale_x_continuous(limits = c(NA, 2023)) +
scale_color_discrete(guide = NULL) +
geom_text_repel(data = nature_index_pc_aggr %>% filter(year == 2022), mapping = aes(label = Region_emil, x = year, y = nature_count_p1M), vjust = "bottom", max.overlaps = Inf) +
ggtitle("Nature count index per capita by country/region")
GG_save("figs/nature count index pc aggregated.png")
#GDP data
gdp = read_csv("data/gdp-per-capita-worldbank/gdp-per-capita-worldbank.csv") %>%
df_legalize_names() %>%
rename(ISO = "Code")
## Rows: 6562 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, GDP per capita, PPP (constant 2017 international $)
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#merge
d = full_join(
scimagojr_pc %>% filter(year == 2022) %>% select(ISO, Citable_documents_p1M),
gdp %>% filter(Year == 2022) %>% select(ISO, GDP_per_capita_PPP_constant_2017_international),
by = "ISO"
) %>% mutate(
name = pu_translate(ISO, reverse = T)
)
## No match: OWID_KOS
## No match: OWID_WRL
#plot
d %>%
GG_scatter("GDP_per_capita_PPP_constant_2017_international", "Citable_documents_p1M", case_names = "name") +
geom_smooth()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#versions
write_sessioninfo()
## R version 4.4.2 (2024-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Brussels
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggrepel_0.9.5 googlesheets4_1.1.1 readxl_1.4.3
## [4] kirkegaard_2024-11-25 psych_2.4.6.26 assertthat_0.2.1
## [7] weights_1.0.4 Hmisc_5.1-3 magrittr_2.0.3
## [10] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1
## [13] dplyr_1.1.4 purrr_1.0.2 readr_2.1.5
## [16] tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.1
## [19] tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] mnormt_2.1.1 gridExtra_2.3 rlang_1.1.4 compiler_4.4.2
## [5] mgcv_1.9-1 gdata_3.0.0 systemfonts_1.1.0 vctrs_0.6.5
## [9] crayon_1.5.3 pkgconfig_2.0.3 shape_1.4.6.1 fastmap_1.2.0
## [13] backports_1.5.0 labeling_0.4.3 utf8_1.2.4 rmarkdown_2.28
## [17] tzdb_0.4.0 nloptr_2.1.1 ragg_1.3.2 bit_4.0.5
## [21] xfun_0.47 glmnet_4.1-8 jomo_2.7-6 cachem_1.1.0
## [25] jsonlite_1.8.8 highr_0.11 pan_1.9 broom_1.0.6
## [29] parallel_4.4.2 cluster_2.1.8 R6_2.5.1 bslib_0.8.0
## [33] stringi_1.8.4 boot_1.3-31 rpart_4.1.23 jquerylib_0.1.4
## [37] cellranger_1.1.0 Rcpp_1.0.13 iterators_1.0.14 knitr_1.48
## [41] base64enc_0.1-3 Matrix_1.7-1 splines_4.4.2 nnet_7.3-19
## [45] timechange_0.3.0 tidyselect_1.2.1 rstudioapi_0.16.0 stringdist_0.9.12
## [49] yaml_2.3.10 codetools_0.2-19 curl_5.2.2 lattice_0.22-5
## [53] plyr_1.8.9 withr_3.0.1 evaluate_0.24.0 foreign_0.8-87
## [57] survival_3.7-0 pillar_1.9.0 mice_3.16.0 checkmate_2.3.2
## [61] foreach_1.5.2 generics_0.1.3 vroom_1.6.5 hms_1.1.3
## [65] munsell_0.5.1 scales_1.3.0 minqa_1.2.8 gtools_3.9.5
## [69] glue_1.7.0 tools_4.4.2 data.table_1.16.0 lme4_1.1-35.5
## [73] fs_1.6.4 grid_4.4.2 colorspace_2.1-1 nlme_3.1-166
## [77] htmlTable_2.4.3 googledrive_2.1.1 Formula_1.2-5 cli_3.6.3
## [81] textshaping_0.4.0 fansi_1.0.6 gargle_1.5.2 gtable_0.3.5
## [85] sass_0.4.9 digest_0.6.37 htmlwidgets_1.6.4 farver_2.1.2
## [89] htmltools_0.5.8.1 lifecycle_1.0.4 httr_1.4.7 mitml_0.4-5
## [93] bit64_4.0.5 MASS_7.3-61
#OSF
if (F) {
library(osfr)
#login
osf_auth(readr::read_lines("~/.config/osf_token"))
#the project we will use
osf_proj = osf_retrieve_node("https://osf.io/XXX/")
#upload all files in project
#overwrite existing (versioning)
osf_upload(
osf_proj,
path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"),
conflicts = "overwrite"
)
}