Apply the following dplyr verbs to your data
Filter rows
filter(data, region == "Europe & Central Asia", year == 2023)
## # A tibble: 58 × 12
## iso3c country region income year population overall_score data_use_score
## <chr> <chr> <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 DNK Denmark Europ… High … 2023 5946952 95.255833333… 100
## 2 FIN Finland Europ… High … 2023 5584264 95.115416666… 100
## 3 POL Poland Europ… High … 2023 36685849 94.65375 100
## 4 SWE Sweden Europ… High … 2023 10536632 94.41 100
## 5 ESP Spain Europ… High … 2023 48373336 94.325 100
## 6 NLD Netherlands Europ… High … 2023 17879488 94.3225 100
## 7 SVN Slovenia Europ… High … 2023 2120937 94.220416666… 100
## 8 PRT Portugal Europ… High … 2023 10525347 93.778333333… 100
## 9 ITA Italy Europ… High … 2023 58761146 93.638750000… 100
## 10 NOR Norway Europ… High … 2023 5519594 93.556666666… 100
## # ℹ 48 more rows
## # ℹ 4 more variables: data_services_score <chr>, data_products_score <chr>,
## # data_sources_score <chr>, data_infrastructure_score <chr>
Arrange rows
arrange(data, desc(population))
## # A tibble: 4,340 × 12
## iso3c country region income year population overall_score data_use_score
## <chr> <chr> <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 IND India South Asia Lower… 2023 1428627663 73.628333333… 80
## 2 IND India South Asia Lower… 2022 1417173173 74.600833333… 80
## 3 CHN China East Asia… Upper… 2021 1412360000 58.978750000… 83.4
## 4 CHN China East Asia… Upper… 2022 1412175000 56.5275 73.4
## 5 CHN China East Asia… Upper… 2020 1411100000 57.8075 83.4
## 6 CHN China East Asia… Upper… 2023 1410710000 59.50375 73.4
## 7 CHN China East Asia… Upper… 2019 1407745000 59.885 83.4
## 8 IND India South Asia Lower… 2021 1407563842 77.087916666… 80
## 9 CHN China East Asia… Upper… 2018 1402760000 56.369583333… 83.4
## 10 IND India South Asia Lower… 2020 1396387127 73.148749999… 80
## # ℹ 4,330 more rows
## # ℹ 4 more variables: data_services_score <chr>, data_products_score <chr>,
## # data_sources_score <chr>, data_infrastructure_score <chr>
Select columns
select(data, country, region, overall_score)
## # A tibble: 4,340 × 3
## country region overall_score
## <chr> <chr> <chr>
## 1 Denmark Europe & Central Asia 95.25583333333334
## 2 Finland Europe & Central Asia 95.11541666666668
## 3 Poland Europe & Central Asia 94.65375
## 4 Sweden Europe & Central Asia 94.41
## 5 Spain Europe & Central Asia 94.325
## 6 Netherlands Europe & Central Asia 94.3225
## 7 Slovenia Europe & Central Asia 94.22041666666668
## 8 Portugal Europe & Central Asia 93.77833333333332
## 9 Italy Europe & Central Asia 93.63875000000002
## 10 Norway Europe & Central Asia 93.55666666666669
## # ℹ 4,330 more rows
Add columns
mutate(data,
overall_score = as.numeric(overall_score),
population = as.numeric(population),
score_per_million = overall_score / (population / 1e6)
)
## # A tibble: 4,340 × 13
## iso3c country region income year population overall_score data_use_score
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 DNK Denmark Europ… High … 2023 5946952 95.3 100
## 2 FIN Finland Europ… High … 2023 5584264 95.1 100
## 3 POL Poland Europ… High … 2023 36685849 94.7 100
## 4 SWE Sweden Europ… High … 2023 10536632 94.4 100
## 5 ESP Spain Europ… High … 2023 48373336 94.3 100
## 6 NLD Netherlands Europ… High … 2023 17879488 94.3 100
## 7 SVN Slovenia Europ… High … 2023 2120937 94.2 100
## 8 PRT Portugal Europ… High … 2023 10525347 93.8 100
## 9 ITA Italy Europ… High … 2023 58761146 93.6 100
## 10 NOR Norway Europ… High … 2023 5519594 93.6 100
## # ℹ 4,330 more rows
## # ℹ 5 more variables: data_services_score <chr>, data_products_score <chr>,
## # data_sources_score <chr>, data_infrastructure_score <chr>,
## # score_per_million <dbl>
Summarize by groups
data %>%
# Ensure overall_score is numeric
mutate(overall_score = as.numeric(overall_score)) %>%
# Group by region
group_by(country) %>%
# Calculate average overall score
summarise(avg_score = mean(overall_score, na.rm = TRUE)) %>%
# Sort it
arrange(desc(avg_score))
## # A tibble: 217 × 2
## country avg_score
## <chr> <dbl>
## 1 Finland 90.9
## 2 Sweden 90.7
## 3 Slovenia 90.2
## 4 Italy 90.0
## 5 Poland 89.4
## 6 Spain 89.4
## 7 United States 89.3
## 8 Canada 89.1
## 9 Denmark 88.8
## 10 Germany 88.7
## # ℹ 207 more rows