library(readxl)
library(ggplot2)
library(reshape2) 
library(data.table)
library(RColorBrewer)
library(gapminder)
library(plotly)
library(stringr)
library(xtable)
library(dplyr)

## Graph 1 - Income distribution over time 

Income <- read_excel("/Users/bastienpatras/Desktop/Sciences Po - Master in Economics/Labor Market/Income.xlsx")



Income_3 <- Income %>%
            na.omit() %>%
            select(Index, "Year and decile","Wage and business income") %>%
            dplyr::rename(Decile = "Year and decile",Income = "Wage and business income") %>%
            mutate(Decile = as.numeric(Decile)) %>%
            filter(Decile<11) %>%
            mutate(Decile = as.factor(Decile)) 

  
plot_1 <- Income_3 %>%
          ggplot(aes(x = Index, y = Income, group = Decile, color = Decile)) +
          geom_line() + 
          geom_point() +
          xlab("Time") +
          ylab("Income distribution by decile") +
  theme(panel.background = element_rect(fill = "white"),
        axis.line = element_line(size = 1, colour = "gray", linetype=2)) +
  scale_color_brewer(palette = "BrBG")


ggplotly(plot_1)
# Graph 7 - Unemployment by education and age

Unemployment_by_degree <- read_excel("/Users/bastienpatras/Desktop/Sciences Po - Master in Economics/Labor Market/Unemployment_by_degree.xls", 
                                     na = ":", skip = 10)


Unemployment_by_degree_long <- reshape2::melt(Unemployment_by_degree) %>%
                              select(Agegroup, `Educationnal degree`,variable, value) %>%
                              na.omit() %>%
                              rename(TIME = variable, UEMPLOYMENT = value)
## Graph 2 - Job vacancy over time 

vacancies_data <- read_excel("/Users/bastienpatras/Desktop/Sciences Po - Master in Economics/Labor Market/vacancies.xlsx")
vacancies_data_long <- reshape2::melt(vacancies_data)
                    
vacancies_data_long <- vacancies_data_long %>%
                       dplyr::rename(Countries = TIME_1, Time = variable) %>%
                       group_by(Time) %>%
                       mutate(Northern_mean = mean(value))


plot_2 <- vacancies_data_long %>%
  ggplot(aes(x = Time, y = value, group = Countries, color = Countries))+
  geom_line() + 
  geom_point() +
  xlab("Time") +
  ylab("Vacancy rate by country") +
  theme(panel.background = element_rect(fill = "white"),
        axis.line = element_line(size = 1, colour = "gray", linetype=2)) +
  scale_color_brewer()

ggplotly(plot_2)
library(readr)

## Graph 3 - Unemployment by age group

Unemployment_by_age_17_24  <- read_csv("/Users/bastienpatras/Desktop/Sciences Po - Master in Economics/Labor Market/DP_LIVE_23022021141046510.csv", col_types = cols(TIME = col_date(format = "%Y"), Value = col_number()))
Unemployment_by_age_25_74  <- read_csv("/Users/bastienpatras/Desktop/Sciences Po - Master in Economics/Labor Market/DP_LIVE_23022021141106416.csv", col_types = cols(TIME = col_date(format = "%Y"), Value = col_number()))

Unemployment_by_age <- rbind(Unemployment_by_age_17_24, Unemployment_by_age_25_74)



Unemployment_by_age_1 <- Unemployment_by_age %>% 
                       select(TIME, LOCATION, Value, SUBJECT) %>%
                       dplyr::rename(Unemployment = 'Value', Age_Group = 'SUBJECT') %>%
                       filter(LOCATION == 'SWE')


Unemployment_by_age_2 <- Unemployment_by_age %>% 
  select(TIME, LOCATION, Value, SUBJECT) %>%
  dplyr::rename(Unemployment = 'Value', Age_Group = 'SUBJECT') %>%
  filter(LOCATION == 'NOR')

Unemployment_by_age_3 <- Unemployment_by_age %>% 
  select(TIME, LOCATION, Value, SUBJECT) %>%
  dplyr::rename(Unemployment = 'Value', Age_Group = 'SUBJECT') %>%
  filter(LOCATION == 'DNK')

Unemployment_by_age_4 <- Unemployment_by_age %>% 
  select(TIME, LOCATION, Value, SUBJECT) %>%
  dplyr::rename(Unemployment = 'Value', Age_Group = 'SUBJECT') %>%
  filter(LOCATION == 'OECD')


plot_3 <- Unemployment_by_age_1 %>%
  ggplot(aes(x = TIME, y = Unemployment, group = Age_Group, color = Age_Group))+
  geom_line() + 
  geom_point() +
  xlab("Time") +
  ylab("Unemployment by age group : Sweden ") +
  theme(panel.background = element_rect(fill = "white"),
        axis.line = element_line(size = 1, colour = "gray", linetype=2)) +
  scale_color_brewer(palette = "Paired")

plot_4 <- Unemployment_by_age_2 %>%
  ggplot(aes(x = TIME, y = Unemployment, group = Age_Group, color = Age_Group))+
  geom_line() + 
  geom_point() +
  ylim(0, 25) +
  xlab("Time") +
  ylab("Unemployment by age group : Norway") +
  theme(panel.background = element_rect(fill = "white"),
        axis.line = element_line(size = 1, colour = "gray", linetype=2)) +
  scale_color_brewer(palette = "Paired")

plot_5 <- Unemployment_by_age_3 %>%
  ggplot(aes(x = TIME, y = Unemployment, group = Age_Group, color = Age_Group))+
  geom_line() + 
  geom_point() +
  ylim(0, 25) +
  xlab("Time") +
  ylab("Unemployment by age group : Denmark") +
  theme(panel.background = element_rect(fill = "white"),
        axis.line = element_line(size = 1, colour = "gray", linetype=2)) +
  scale_color_brewer(palette = "Paired")

plot_6 <- Unemployment_by_age_4 %>%
  ggplot(aes(x = TIME, y = Unemployment, group = Age_Group, color = Age_Group))+
  geom_line() + 
  geom_point() +
  ylim(0, 25) +
  xlim(as.Date.numeric(1983, origin="1983-01-01"),as.Date.numeric(2019, origin="2019-01-01")) +
  xlab("Time") +
  ylab("Unemployment by age group : OECD") +
  theme(panel.background = element_rect(fill = "white"),
        axis.line = element_line(size = 1, colour = "gray", linetype=2)) +
  scale_color_brewer(palette = "Paired")

plot_66 <- Unemployment_by_age_4 %>%
  ggplot(aes(x = TIME, y = Unemployment, group = Age_Group, color = Age_Group))+
  geom_line() + 
  geom_point() +
  ylim(0, 25) +
  xlab("Time") +
  ylab("Unemployment by age group : OECD") +
  theme(panel.background = element_rect(fill = "white"),
        axis.line = element_line(size = 1, colour = "gray", linetype=2)) +
  scale_color_brewer(palette = "Paired")

ggplotly(plot_3)
ggplotly(plot_4)
ggplotly(plot_5)
ggplotly(plot_66)
library(readxl)

## Graph 4 - Dependency ratio

Dependency <- read_excel("/Users/bastienpatras/Desktop/Sciences Po - Master in Economics/Labor Market/Dependency.xls", 
                         skip = 3)

Dependency_1 <- Dependency %>%
              rename(Country_Name = 'Country Name', Country_Code = 'Country Code') %>%
              filter(Country_Name == 'Sweden' 
                     | Country_Name == 'OECD members' 
                     | Country_Name == 'Denmark' 
                     | Country_Name == 'Norway') 

Dependency_long <- reshape2::melt(Dependency_1) %>%
                   rename(Dependency = value, TIME = variable)


plot_Dependency <-Dependency_long %>%
  ggplot(aes(x = TIME, y = Dependency, group = Country_Name, color = Country_Name))+
  geom_line() + 
  geom_point() +
  theme(axis.text.x = element_text(angle = 75, size=7)) +
  xlab("Time") +
  ylab("Age dependency ratio (% of working-age population)") +
  theme(panel.background = element_rect(fill = "white"),
        axis.line = element_line(size = 1, colour = "gray", linetype=2)) +
  scale_color_brewer(palette = "Paired")

ggplotly(plot_Dependency)
library(readr)

# Graph 5 - Employment by gender

swe_gender_men <- read_csv("/Users/bastienpatras/Desktop/Sciences Po - Master in Economics/Labor Market/swe_gender_men.csv", 
                           col_types = cols(FREQUENCY = col_character(), 
                                            TIME = col_number(), Value = col_number(), 
                                            `Flag Codes` = col_skip()))
swe_gender_woman <- read_csv("/Users/bastienpatras/Desktop/Sciences Po - Master in Economics/Labor Market/swe_gender_woman.csv", 
                             col_types = cols(TIME = col_number(), 
                                              Value = col_number(), `Flag Codes` = col_skip()))


swe_gender <- rbind(swe_gender_men, swe_gender_woman) %>% 
  select(LOCATION, 
         SUBJECT,
         TIME, 
         Value)  %>% 
  rename(Gender = 'SUBJECT',
         Employment = 'Value') %>%
  filter(LOCATION == 'SWE')  


################### Var ################

swe_gender_long <- rbind(swe_gender_men, swe_gender_woman) %>% 
  select(LOCATION, 
         SUBJECT,
         TIME, 
         Value)  %>% 
  rename(Gender = 'SUBJECT',
         Employment = 'Value') %>%
  filter(LOCATION == 'SWE')  %>%
  mutate(DIFF = (Employment-lead(Employment, n=19))) 


swe_gender_long_var <- reshape2::melt(swe_gender, id.vars = c('LOCATION', 'TIME', 'Gender')) %>%
  mutate(Gender = ifelse(variable == 'DIFF','DIFF', Gender)) %>%
  select(LOCATION, TIME, Gender, value) %>%
  rename(Employment = value) %>%
  filter(Gender == 'DIFF') %>%
  na.omit()


#######################################

SummStat_swe_gender <- swe_gender %>%
  group_by(Gender) %>%
  summarize(across(
    .cols = Employment,
    .fns = list(n = ~n(), 
                Mean = ~mean(.x, na.rm = T), 
                SD = ~var(.x, na.rm = T)^0.5,
                Min = ~min(.x, na.rm = T),
                Max = ~max(.x, na.rm = T),
                Med = ~median(.x, na.rm = T)),
    .names = "{.col}_{.fn}")
  ) 

as_tibble(SummStat_swe_gender)
## # A tibble: 2 x 7
##   Gender Employment_n Employment_Mean Employment_SD Employment_Min
##   <chr>         <int>           <dbl>         <dbl>          <dbl>
## 1 MEN              19            76.0          1.56           73.6
## 2 WOMEN            19            72.4          1.92           69.6
## # … with 2 more variables: Employment_Max <dbl>, Employment_Med <dbl>
plot_7 <- swe_gender %>%
  ggplot(aes(x = TIME, y = Employment, group = Gender, color = Gender))+
  geom_line() + 
  geom_point() +
  xlab("Time") +
  ylab("Employment by age group : Sweden") +
  theme(panel.background = element_rect(fill = "white"),
        axis.line = element_line(size = 0.5, colour = "gray")) +
  scale_color_brewer(palette = "Paired")

ggplotly(plot_7)
plot_diff <- swe_gender_long_var %>%
  ggplot(aes(x = TIME, y = Employment, group = Gender, color = Gender))+
  geom_line() + 
  geom_point() +
  xlab("Time") +
  ylab("Employment by age group : Sweden") +
  theme(panel.background = element_rect(fill = "white"),
        axis.line = element_line(size = 0.5, colour = "gray")) +
  scale_color_brewer(palette = "Paired")
library(readxl)

# Graph 6 - Unemployment by education and gender

unemployment_by_edu <- read_excel("/Users/bastienpatras/Desktop/Sciences Po - Master in Economics/Labor Market/unemployment_by_edu.xlsx", 
                                  skip = 2)

unemployment_by_edu_long <- reshape2::melt(unemployment_by_edu) %>%
                            filter(Unemployment == 'total', 
                                   Gender == 'total', 
                                   `Education degree` != 'All educational levels') %>%
                            rename(TIME = variable, UEMPLOYMENT = value, Type = Unemployment)


plot_Unemployment_by_education <- unemployment_by_edu_long %>%
  ggplot(aes(x = TIME, y = UEMPLOYMENT, group = `Education degree`, color = `Education degree`))+
  geom_line() + 
  geom_point() +
  xlab("Time") +
  ylab("Unemployment by educational degree : Sweden") +
  theme(panel.background = element_rect(fill = "white"),
        axis.line = element_line(size = 0.5, colour = "gray")) +
  scale_color_brewer(palette = "Paired")

ggplotly(plot_Unemployment_by_education)
library(readxl)

## Summary statistics - G7

SummStat_Unemployment_by_degree_long <- Unemployment_by_degree_long %>%
  group_by(Agegroup, `Educationnal degree`) %>%
  summarize(across(
    .cols = UEMPLOYMENT,
    .fns = list(n = ~n(), 
                Mean = ~mean(.x, na.rm = T), 
                SD = ~var(.x, na.rm = T)^0.5,
                Min = ~min(.x, na.rm = T),
                Max = ~max(.x, na.rm = T),
                Med = ~median(.x, na.rm = T)),
    .names = "{.col}_{.fn}")
  ) 

as_tibble(SummStat_Unemployment_by_degree_long)
## # A tibble: 6 x 8
##   Agegroup `Educationnal d… UEMPLOYMENT_n UEMPLOYMENT_Mean UEMPLOYMENT_SD
##   <chr>    <chr>                    <int>            <dbl>          <dbl>
## 1 From 15… Less than prima…           162            31.8           8.91 
## 2 From 15… Upper secondary…           149            17.0           6.12 
## 3 From 20… Tertiary educat…            65            12.4           3.11 
## 4 From 20… Less than prima…           164            12.4           4.19 
## 5 From 20… Tertiary educat…           164             4.03          0.650
## 6 From 20… Upper secondary…           164             5.98          1.15 
## # … with 3 more variables: UEMPLOYMENT_Min <dbl>, UEMPLOYMENT_Max <dbl>,
## #   UEMPLOYMENT_Med <dbl>
##############

plot_Unemployment_by_degree_long_15 <- Unemployment_by_degree_long %>%
filter(Agegroup == 'From 15 to 19 years') %>%
  ggplot(aes(x = TIME, y = UEMPLOYMENT, group = `Educationnal degree`, color = `Educationnal degree`))+
  geom_line() + 
  geom_point() +
  xlab("Time") +
  ylab("Unemployment by educational degree : Sweden") +
  theme(panel.background = element_rect(fill = "white"),
        axis.line = element_line(size = 0.5, colour = "gray")) +
  scale_color_brewer(palette = "Paired")

ggplotly(plot_Unemployment_by_degree_long_15)
###############

plot_Unemployment_by_degree_long_65 <- Unemployment_by_degree_long %>%
  filter(Agegroup == 'From 20 to 64 years') %>%
  ggplot(aes(x = TIME, y = UEMPLOYMENT, group = `Educationnal degree`, color = `Educationnal degree`))+
  geom_line() + 
  geom_point() +
  xlab("Time") +
  ylab("Unemployment by educational degree : Sweden") +
  theme(panel.background = element_rect(fill = "white"),
        axis.line = element_line(size = 0.5, colour = "gray")) +
  scale_color_brewer(palette = "Paired")

ggplotly(plot_Unemployment_by_degree_long_65)
################

Employment_by_sector <- read_excel("/Users/bastienpatras/Desktop/Sciences Po - Master in Economics/Labor Market/Employment_by_sector.xls", 
                                   skip = 3)



plot_Unemployment_by_education <- reshape2::melt(Employment_by_sector) %>%
  rename(TIME = variable, EMPLOYMENT = value, Sector = `GEO/TIME`) %>%
  ggplot(aes(x = TIME, y = EMPLOYMENT, group = Sector, color = Sector))+
  geom_line() + 
  geom_point() +
  xlab("Time") +
  ylab("Employment by sector over time : Sweden") +
  theme(panel.background = element_rect(fill = "white"),
        axis.line = element_line(size = 0.5, colour = "gray"),
        axis.text.x = element_text(angle = 75, size=7)) +
  scale_color_brewer()

ggplotly(plot_Unemployment_by_education)