This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
library(dplyr)
library(readr)
library(rsconnect)
# import population data
un <- read_csv2("/Users/timhirschfeld/Desktop/WPP2022_POPULATION_1990-2030.csv")
ds <- read_csv2("https://www.deep-seas.eu/wp-content/uploads/2023/03/20177113_DS_D3.1-SEAS2-dataset-2022_rev1.csv")
# Altersgruppen nicht vergleichbar mit UN data, deshalb Neurberechnung der Gruppen, also 20-34 statt 18-34
ds$groups <- NA
ds$groups[which(ds$SD_2 >= 20 & ds$SD_2 <=34)] <- 1
ds$groups[which(ds$SD_2 >= 35 & ds$SD_2 <=44)] <- 2
ds$groups[which(ds$SD_2 >= 45 & ds$SD_2 <=64)] <- 3
AGG <- ds %>%
group_by(COUNTRY, groups, SD_1) %>%
summarize(n())
AGG$COUNTRY[which(AGG$COUNTRY == 10)] <- "Finland"
AGG$COUNTRY[which(AGG$COUNTRY == 11)] <- "France"
AGG$COUNTRY[which(AGG$COUNTRY == 12)] <- "Germany"
AGG$COUNTRY[which(AGG$COUNTRY == 13)] <- "Greece"
AGG$COUNTRY[which(AGG$COUNTRY == 14)] <- "Hungary"
AGG$COUNTRY[which(AGG$COUNTRY == 15)] <- "Iceland"
AGG$COUNTRY[which(AGG$COUNTRY == 16)] <- "Ireland"
AGG$COUNTRY[which(AGG$COUNTRY == 17)] <- "Italy"
AGG$COUNTRY[which(AGG$COUNTRY == 18)] <- "Latvia"
AGG$COUNTRY[which(AGG$COUNTRY == 19)] <- "Lithuania"
AGG$COUNTRY[which(AGG$COUNTRY == 20)] <- "Luxembourg"
AGG$COUNTRY[which(AGG$COUNTRY == 21)] <- "Malta"
AGG$COUNTRY[which(AGG$COUNTRY == 22)] <- "Moldova"
AGG$COUNTRY[which(AGG$COUNTRY == 23)] <- "Netherlands"
AGG$COUNTRY[which(AGG$COUNTRY == 24)] <- "Norway"
AGG$COUNTRY[which(AGG$COUNTRY == 25)] <- "Poland"
AGG$COUNTRY[which(AGG$COUNTRY == 26)] <- "Portugal"
AGG$COUNTRY[which(AGG$COUNTRY == 27)] <- "Romania"
AGG$COUNTRY[which(AGG$COUNTRY == 28)] <- "Serbia"
AGG$COUNTRY[which(AGG$COUNTRY == 29)] <- "Slovakia"
AGG$COUNTRY[which(AGG$COUNTRY == 30)] <- "Slovenia"
AGG$COUNTRY[which(AGG$COUNTRY == 31)] <- "Spain"
AGG$COUNTRY[which(AGG$COUNTRY == 32)] <- "Sweden"
AGG$COUNTRY[which(AGG$COUNTRY == 33)] <- "United Kingdom"
AGG$COUNTRY[which(AGG$COUNTRY == 34)] <- "Other Europe"
AGG$COUNTRY[which(AGG$COUNTRY == 35)] <- "Turkey"
# perpare merging by creating similar data structure to merge by
AGG$country <- AGG$COUNTRY
AGG$subpop <- AGG$"n()"
AGG$sex <- AGG$SD_1
AGG$sex[which(AGG$sex == 1)] <- "male"
AGG$sex[which(AGG$sex == 2)] <- "female"
AGG$sex[which(AGG$sex == 3)] <- "other" # not included in UN data, but could be shown as missings somehow
un$groups <- NA
un$groups[which(un$AgeGrp == "20-24" |
un$AgeGrp =="25-29" |
un$AgeGrp == "30-34")] <- 1
un$groups[which(un$AgeGrp == "35-39" |
un$AgeGrp =="40-44" )] <- 2
un$groups[which(un$AgeGrp == "45-49" |
un$AgeGrp =="50-54" |
un$AgeGrp == "55-59" |
un$AgeGrp == "60-64" )] <- 3
un$pop <- as.numeric(un$pop)
uno <- un %>%
select(country, year, groups, sex, pop) %>%
filter(year == 2022) %>%
filter(sex == "female" | sex == "male") %>%
mutate(pop = pop * 1000) %>%
group_by(country, groups, sex) %>%
summarise(sum_pop = sum(pop),
.groups = 'drop')
uno <- na.omit(uno)
# merge
dat <- merge(uno, AGG, by = c("country", "groups", "sex"))
dat <- dat %>%
select(-c(COUNTRY,SD_1, "n()"))
dat$subpop <- as.numeric(dat$subpop)
dat <- dat %>%
group_by(country, groups, sex) %>%
mutate(part_percent = (subpop / sum_pop) *100) %>% # in percent
mutate(part_100.000 = part_percent * 1000) # per 100.000
library(DT)
datatable(dat)
library(plotly)
dat$groups <- factor(dat$groups, levels = c(1,2,3), labels = c("20-34", "35-44", "45-64"))
fig <- dat %>%
ggplot(aes(groups, part_100.000)) +
geom_col(aes(fill=sex), position= "dodge") +
coord_flip() +
theme_minimal() +
scale_y_continuous() +
ggtitle("Nr. of participants per country according to age and sex") +
labs(y = "Nr. of Participants per 100.000", x = "Age Groups")
fig <- fig + facet_wrap(~country, ncol = 1)
ggplotly(fig)