This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

Aufgaben

1) Bitte in R die folgenden beiden Datensätze laden:

  • 20177113_DS_D3.1-SEAS2-dataset-2022_rev1.csv
  • WPP2022_POPULATION_1990-2030.csv
library(dplyr)
library(readr)
library(rsconnect)
# import population data
un <- read_csv2("/Users/timhirschfeld/Desktop/WPP2022_POPULATION_1990-2030.csv")
ds <- read_csv2("https://www.deep-seas.eu/wp-content/uploads/2023/03/20177113_DS_D3.1-SEAS2-dataset-2022_rev1.csv")

2) In den DS-Daten soll die Zahl der Personen (respondents) nach Land, Altersgruppe und Geschlecht aggregiert (gezählt) werden und in einem neuen Object „AGG“ (zB data.frame) abgespeichert werden.

# Altersgruppen nicht vergleichbar mit UN data, deshalb Neurberechnung der Gruppen, also 20-34 statt 18-34

ds$groups <- NA
ds$groups[which(ds$SD_2 >= 20 & ds$SD_2 <=34)] <- 1
ds$groups[which(ds$SD_2 >= 35 & ds$SD_2 <=44)] <- 2
ds$groups[which(ds$SD_2 >= 45 & ds$SD_2 <=64)] <- 3

AGG <- ds %>%
  group_by(COUNTRY, groups, SD_1) %>%
  summarize(n())

3) Für jedes Land-Altersgruppe-Geschlechtsstratum soll die aggregierte Zahl der Personen mit der zugrundeliegenden Population verbunden werden. Hierfür bitte das neue Objekt „AGG“ mit den Populationsdaten verbinden (z.B. mit merge function). Die Populationsdaten müssen dafür entsprechend formatiert werden und enthalten keine Informationen für Katalonien (nur für Länder im engeren Sinne).

AGG$COUNTRY[which(AGG$COUNTRY == 10)] <- "Finland"
AGG$COUNTRY[which(AGG$COUNTRY == 11)] <- "France"
AGG$COUNTRY[which(AGG$COUNTRY == 12)] <- "Germany"
AGG$COUNTRY[which(AGG$COUNTRY == 13)] <- "Greece"
AGG$COUNTRY[which(AGG$COUNTRY == 14)] <- "Hungary"
AGG$COUNTRY[which(AGG$COUNTRY == 15)] <- "Iceland"
AGG$COUNTRY[which(AGG$COUNTRY == 16)] <- "Ireland"
AGG$COUNTRY[which(AGG$COUNTRY == 17)] <- "Italy"
AGG$COUNTRY[which(AGG$COUNTRY == 18)] <- "Latvia"
AGG$COUNTRY[which(AGG$COUNTRY == 19)] <- "Lithuania"
AGG$COUNTRY[which(AGG$COUNTRY == 20)] <- "Luxembourg"
AGG$COUNTRY[which(AGG$COUNTRY == 21)] <- "Malta"
AGG$COUNTRY[which(AGG$COUNTRY == 22)] <- "Moldova"
AGG$COUNTRY[which(AGG$COUNTRY == 23)] <- "Netherlands"
AGG$COUNTRY[which(AGG$COUNTRY == 24)] <- "Norway"
AGG$COUNTRY[which(AGG$COUNTRY == 25)] <- "Poland"
AGG$COUNTRY[which(AGG$COUNTRY == 26)] <- "Portugal"
AGG$COUNTRY[which(AGG$COUNTRY == 27)] <- "Romania"
AGG$COUNTRY[which(AGG$COUNTRY == 28)] <- "Serbia"
AGG$COUNTRY[which(AGG$COUNTRY == 29)] <- "Slovakia"
AGG$COUNTRY[which(AGG$COUNTRY == 30)] <- "Slovenia"
AGG$COUNTRY[which(AGG$COUNTRY == 31)] <- "Spain"
AGG$COUNTRY[which(AGG$COUNTRY == 32)] <- "Sweden"
AGG$COUNTRY[which(AGG$COUNTRY == 33)] <- "United Kingdom"
AGG$COUNTRY[which(AGG$COUNTRY == 34)] <- "Other Europe"
AGG$COUNTRY[which(AGG$COUNTRY == 35)] <- "Turkey"


# perpare merging by creating similar data structure to merge by

AGG$country <- AGG$COUNTRY
AGG$subpop <- AGG$"n()"
AGG$sex <- AGG$SD_1
AGG$sex[which(AGG$sex == 1)] <- "male"
AGG$sex[which(AGG$sex == 2)] <- "female"
AGG$sex[which(AGG$sex == 3)] <- "other" # not included in UN data, but could be shown as missings somehow

un$groups <- NA
un$groups[which(un$AgeGrp == "20-24" | 
                un$AgeGrp =="25-29" | 
                un$AgeGrp == "30-34")] <- 1
un$groups[which(un$AgeGrp == "35-39" | 
                un$AgeGrp =="40-44" )] <- 2
un$groups[which(un$AgeGrp == "45-49" | 
                un$AgeGrp =="50-54" |
                un$AgeGrp == "55-59" | 
                un$AgeGrp == "60-64" )] <- 3


un$pop <- as.numeric(un$pop)
uno <- un %>%
  select(country, year, groups, sex, pop) %>%
  filter(year == 2022) %>%
  filter(sex == "female" | sex == "male") %>%
  mutate(pop = pop * 1000) %>%
  group_by(country, groups, sex) %>%
  summarise(sum_pop = sum(pop),
            .groups = 'drop')
uno <- na.omit(uno)

# merge

dat <- merge(uno, AGG, by = c("country", "groups", "sex"))

dat <- dat %>%
  select(-c(COUNTRY,SD_1, "n()")) 

4) Bitte für jedes Stratum den Anteil der Bevölkerung berechnen, die beim DS teilgenommen hat (Personen / Population).

dat$subpop <- as.numeric(dat$subpop)

dat <- dat %>%
  group_by(country, groups, sex) %>%
  mutate(part_percent = (subpop / sum_pop) *100) %>% # in percent
  mutate(part_100.000 = part_percent * 1000) # per 100.000

library(DT)
datatable(dat)

5) Die Ergebnisse nach eigenen Überlegungen (grafisch) darstellen.

library(plotly)

dat$groups <- factor(dat$groups, levels = c(1,2,3), labels = c("20-34", "35-44", "45-64"))

fig <- dat %>%
  ggplot(aes(groups, part_100.000)) +
  geom_col(aes(fill=sex), position= "dodge") +
  coord_flip() +
  theme_minimal() +
  scale_y_continuous() +
  ggtitle("Nr. of participants per country according to age and sex") +
  labs(y = "Nr. of Participants per 100.000", x = "Age Groups")
fig <- fig + facet_wrap(~country, ncol = 1)

ggplotly(fig)