library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
life_expectancy_female_wide <- read_excel("DATA/life_expectancy_female_wide.xlsx")
life_expectancy_male_wide <- read_excel("DATA/life_expectancy_male_wide.xlsx")
life_f_long<-life_expectancy_female_wide%>% pivot_longer(cols= starts_with("year_"),
names_to = 'year',values_to = 'female_life_expectancy',names_prefix = "year_")
life_m_long<- life_expectancy_male_wide%>% pivot_longer(cols = starts_with("year_"),
names_to = 'year',values_to = 'male_life_expectancy',names_prefix = "year_")
Joined<- left_join(x=life_f_long,y=life_m_long,by=c("country","year"))
Joined <- Joined %>% select(-population_2019.x, -population_2019.y)
colSums(is.na(Joined)) # HOW MUCH NULL ??
## country country_code.x year
## 0 120 0
## female_life_expectancy country_code.y male_life_expectancy
## 110 120 110
Joined <- na.omit(Joined) # Delte ALL NULL CELLS
gdp_happiness_per_country <- read_excel("DATA/gdp_happiness_per_country.xlsx")
country_area<- gdp_happiness_per_country%>% filter(year==2010)
country_area_filtered <- country_area %>% select(country_name, regional_indicator)
country_area_filtered <- country_area_filtered %>%
rename(country = country_name)
final <- left_join(Joined,country_area_filtered,by = c("country"))
final<- final%>% filter(!is.na(regional_indicator))
final%>% ggplot(aes(x=female_life_expectancy,y=male_life_expectancy, colour = regional_indicator))+geom_point()+theme_light()+
labs(title="Male to Female")+facet_wrap(vars(regional_indicator))
df<- final%>% filter(country %in% c ("Israel","Japan","Uganda"))
df2<- df %>% group_by(country,year)%>%
summarise(Mf=median(female_life_expectancy,na.arm=TRUE))
## `summarise()` has grouped output by 'country'. You can override using the
## `.groups` argument.
ggplot(df2,aes(x=year,y=Mf,fill=country))+geom_bar(stat = "identity",position = "dodge")+
theme_classic()+
scale_fill_brewer(palette = "Set1")
##D boxplot
df3<- final%>% filter(year>=2016)
ggplot(df3, aes(x = factor(year), y = female_life_expectancy, fill = year)) +
geom_boxplot()+facet_wrap(~regional_indicator)+labs(title = "Female Life Expectancy over the Years in different regions")+
theme(plot.title = element_text(size = 14, color = "blue", face = "bold"))+theme(
axis.title.x=element_blank(),
axis.title.y=element_blank())+geom_jitter(color = "black", width = 0.1, size = 0.1)
```