library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)

life_expectancy_female_wide <- read_excel("DATA/life_expectancy_female_wide.xlsx")
life_expectancy_male_wide <- read_excel("DATA/life_expectancy_male_wide.xlsx")

life_f_long<-life_expectancy_female_wide%>% pivot_longer(cols=  starts_with("year_"), 
               names_to = 'year',values_to = 'female_life_expectancy',names_prefix = "year_")

life_m_long<- life_expectancy_male_wide%>% pivot_longer(cols = starts_with("year_"),
                 names_to = 'year',values_to = 'male_life_expectancy',names_prefix = "year_")

Joined<- left_join(x=life_f_long,y=life_m_long,by=c("country","year"))
Joined <- Joined %>% select(-population_2019.x, -population_2019.y)

colSums(is.na(Joined)) # HOW MUCH NULL ?? 
##                country         country_code.x                   year 
##                      0                    120                      0 
## female_life_expectancy         country_code.y   male_life_expectancy 
##                    110                    120                    110
Joined <- na.omit(Joined) # Delte ALL NULL CELLS

gdp_happiness_per_country <- read_excel("DATA/gdp_happiness_per_country.xlsx")
country_area<- gdp_happiness_per_country%>% filter(year==2010)
country_area_filtered <- country_area %>% select(country_name, regional_indicator)
country_area_filtered <- country_area_filtered %>%
  rename(country = country_name)
final <- left_join(Joined,country_area_filtered,by = c("country"))
final<- final%>% filter(!is.na(regional_indicator))
final%>% ggplot(aes(x=female_life_expectancy,y=male_life_expectancy, colour = regional_indicator))+geom_point()+theme_light()+ 
  labs(title="Male to Female")+facet_wrap(vars(regional_indicator))

df<- final%>% filter(country %in% c ("Israel","Japan","Uganda"))

df2<- df %>% group_by(country,year)%>% 
  summarise(Mf=median(female_life_expectancy,na.arm=TRUE)) 
## `summarise()` has grouped output by 'country'. You can override using the
## `.groups` argument.
ggplot(df2,aes(x=year,y=Mf,fill=country))+geom_bar(stat = "identity",position = "dodge")+
  theme_classic()+
  scale_fill_brewer(palette = "Set1")

##D boxplot

axis.text.x=element_blank() מוחק כותרות

     df3<- final%>% filter(year>=2016)

ggplot(df3, aes(x = factor(year), y = female_life_expectancy, fill = year)) + 
  geom_boxplot()+facet_wrap(~regional_indicator)+labs(title = "Female Life Expectancy over the Years in different regions")+
   theme(plot.title = element_text(size = 14, color = "blue", face = "bold"))+theme(
        axis.title.x=element_blank(),
         axis.title.y=element_blank())+geom_jitter(color = "black", width = 0.1, size = 0.1)

```