Load packages:

library(dplyr)
library(ggplot2)
library(gridExtra)
library(reshape2)

Parte 1: Dati

Productivity : https://stats.oecd.org/Index.aspx?DataSetCode=PDB_LV Average wages : https://data.oecd.org/earnwage/average-wages.htm Minimum wages : https://ec.europa.eu/eurostat/databrowser/view/earn_mw_cur/settings_1/table?lang=en

Load data:

continents <- read.csv("continents2.csv")
Average.wages <- read.csv("Average wages.csv")
Produttività <- read.csv("Produttività.csv")
salario_minimo <- read.csv("salario_minimo.csv")

Correlation of labor productivity with average wage

colnames(continents)[3] <- "LOCATION"
colnames(continents)[1] <- "Stato"

df <- merge(Average.wages,continents[,c(1,3,6,7)] , by="LOCATION")
df1<-Produttività %>%
  filter(Subject=="GDP per hour worked", Measure=="USD, current prices, current PPPs") %>%
  select(Country,TIME,Value)

colnames(df1)[1] <-"Stato"
colnames(df1)[3] <-"Productivity"

df2<- df %>%
  select(Stato,TIME,Value)

colnames(df2)[3] <-"salario_medio"

df3 <- merge(df1,df2, by=c("Stato","TIME"))
cor(df3$Productivity,df3$salario_medio)
## [1] 0.7523821
df3 %>%
  filter(TIME==2021) %>%
  ggplot(aes(Productivity,salario_medio)) +
  geom_point()+
  geom_text(aes(label=Stato), hjust=-0.2,size=3)+
  stat_smooth(method="lm")+
  ylab("Average wage")+
  xlab("Productivity")+
  ggtitle("Productivity per Average wage in 2021", subtitle = "con relativa retta di regressione")
## `geom_smooth()` using formula = 'y ~ x'

g1<-df1 %>%
  filter(Stato %in% c("United States","Germany","France","Spain","Italy"), TIME>1990) %>%
  ggplot(aes(TIME,Productivity, colour=Stato))+
 geom_line(linewidth=1)+
 xlab("Anno")+
 ggtitle("Produttività from 1990 to 2022")


g2<-df2 %>%
  filter(Stato %in% c("United States","Germany","France","Spain","Italy")) %>%
  ggplot(aes(TIME,salario_medio , colour=Stato))+
  geom_line(linewidth=1)+
  xlab("Anno")+
  ggtitle("Average wage from 1990 to 2022")

grid.arrange(g1,g2, nrow=1,ncol=2)

for (country in c("United States","Germany","France","Spain","Italy")) {

  g1<-df1 %>%
  filter(Stato==country, TIME>1990) %>%
  ggplot(aes(TIME,Productivity))+
 geom_line(colour="blue")+
 guides(fill="none")+
 xlab("Anno")+
 ggtitle(paste("Produttività in ",country), subtitle = "from 1990 to 2022")


g2<-df2 %>%
  filter(Stato==country) %>%
  ggplot(aes(TIME,salario_medio))+
  geom_line(colour="red")+
  guides(fill="none")+
  xlab("Anno")+
  ggtitle(paste("Average wage in ",country), subtitle = "from 1990 to 2022")

grid.arrange(g1,g2, nrow=1,ncol=2)
    
}

Correlazione Minimum wage con Average wage

df4<-salario_minimo %>%
  select(TIME,X2018.S2,X2019.S2,X2020.S2,X2021.S2,X2022.S2,X2023.S1 )

colnames(df4)[1] <- "Stato"
colnames(df4)[2:7] <- 2018:2023

df4 <- melt(df4, id="Stato")

colnames(df4)[2] <- "TIME"
colnames(df4)[3] <- "salario_minimo"

df5 <- merge(df4,df2, by=c("Stato","TIME"))
## Warning in `[<-.factor`(`*tmp*`, ri, value = c(2017L, 2010L, 2011L, 2012L, :
## livello factore non valido, generato NA
df5$salario_minimo <- as.numeric(df5$salario_minimo)
## Warning: NA introdotti per coercizione
df5<- df5[-which(is.na(df5$salario_minimo)),]
cor(df5$salario_minimo,df5$salario_medio)
## [1] 0.8382337
df5 %>%
  filter(TIME==2021) %>%
  ggplot(aes(salario_minimo,salario_medio)) +
  geom_point()+
  geom_text(aes(label=Stato), hjust=-0.2,size=3)+
  stat_smooth(method="lm")+
  ylab("Average wage")+
  xlab("Minimum wage")+
  ggtitle("Minimum wage per Average wage in 2021", subtitle = "con relativa retta di regressione")
## `geom_smooth()` using formula = 'y ~ x'