library(dplyr)
library(ggplot2)
library(gridExtra)
library(reshape2)
Productivity : https://stats.oecd.org/Index.aspx?DataSetCode=PDB_LV Average wages : https://data.oecd.org/earnwage/average-wages.htm Minimum wages : https://ec.europa.eu/eurostat/databrowser/view/earn_mw_cur/settings_1/table?lang=en
Load data:
continents <- read.csv("continents2.csv")
Average.wages <- read.csv("Average wages.csv")
Produttività <- read.csv("Produttività .csv")
salario_minimo <- read.csv("salario_minimo.csv")
colnames(continents)[3] <- "LOCATION"
colnames(continents)[1] <- "Stato"
df <- merge(Average.wages,continents[,c(1,3,6,7)] , by="LOCATION")
df1<-Produttività %>%
filter(Subject=="GDP per hour worked", Measure=="USD, current prices, current PPPs") %>%
select(Country,TIME,Value)
colnames(df1)[1] <-"Stato"
colnames(df1)[3] <-"Productivity"
df2<- df %>%
select(Stato,TIME,Value)
colnames(df2)[3] <-"salario_medio"
df3 <- merge(df1,df2, by=c("Stato","TIME"))
cor(df3$Productivity,df3$salario_medio)
## [1] 0.7523821
df3 %>%
filter(TIME==2021) %>%
ggplot(aes(Productivity,salario_medio)) +
geom_point()+
geom_text(aes(label=Stato), hjust=-0.2,size=3)+
stat_smooth(method="lm")+
ylab("Average wage")+
xlab("Productivity")+
ggtitle("Productivity per Average wage in 2021", subtitle = "con relativa retta di regressione")
## `geom_smooth()` using formula = 'y ~ x'
g1<-df1 %>%
filter(Stato %in% c("United States","Germany","France","Spain","Italy"), TIME>1990) %>%
ggplot(aes(TIME,Productivity, colour=Stato))+
geom_line(linewidth=1)+
xlab("Anno")+
ggtitle("Produttività from 1990 to 2022")
g2<-df2 %>%
filter(Stato %in% c("United States","Germany","France","Spain","Italy")) %>%
ggplot(aes(TIME,salario_medio , colour=Stato))+
geom_line(linewidth=1)+
xlab("Anno")+
ggtitle("Average wage from 1990 to 2022")
grid.arrange(g1,g2, nrow=1,ncol=2)
for (country in c("United States","Germany","France","Spain","Italy")) {
g1<-df1 %>%
filter(Stato==country, TIME>1990) %>%
ggplot(aes(TIME,Productivity))+
geom_line(colour="blue")+
guides(fill="none")+
xlab("Anno")+
ggtitle(paste("Produttività in ",country), subtitle = "from 1990 to 2022")
g2<-df2 %>%
filter(Stato==country) %>%
ggplot(aes(TIME,salario_medio))+
geom_line(colour="red")+
guides(fill="none")+
xlab("Anno")+
ggtitle(paste("Average wage in ",country), subtitle = "from 1990 to 2022")
grid.arrange(g1,g2, nrow=1,ncol=2)
}
df4<-salario_minimo %>%
select(TIME,X2018.S2,X2019.S2,X2020.S2,X2021.S2,X2022.S2,X2023.S1 )
colnames(df4)[1] <- "Stato"
colnames(df4)[2:7] <- 2018:2023
df4 <- melt(df4, id="Stato")
colnames(df4)[2] <- "TIME"
colnames(df4)[3] <- "salario_minimo"
df5 <- merge(df4,df2, by=c("Stato","TIME"))
## Warning in `[<-.factor`(`*tmp*`, ri, value = c(2017L, 2010L, 2011L, 2012L, :
## livello factore non valido, generato NA
df5$salario_minimo <- as.numeric(df5$salario_minimo)
## Warning: NA introdotti per coercizione
df5<- df5[-which(is.na(df5$salario_minimo)),]
cor(df5$salario_minimo,df5$salario_medio)
## [1] 0.8382337
df5 %>%
filter(TIME==2021) %>%
ggplot(aes(salario_minimo,salario_medio)) +
geom_point()+
geom_text(aes(label=Stato), hjust=-0.2,size=3)+
stat_smooth(method="lm")+
ylab("Average wage")+
xlab("Minimum wage")+
ggtitle("Minimum wage per Average wage in 2021", subtitle = "con relativa retta di regressione")
## `geom_smooth()` using formula = 'y ~ x'