ISTAT DATA - LIFE EXPECTANCY ITALY 2010-2019 BY AGE CAT

Download the dataset and read it: source:http://dati.istat.it/Index.aspx?DataSetCode=DCIS_MORTALITA1&Lang=en#

require(readr)
IT_life_exp<-read.csv("bio_regions.csv")

head(IT_life_exp)
##   ITTER107 Territory TIPO_DATO15 Biometric.functions SEXISTAT1  Gender   ETA1
## 1     ITC4 Lombardia   SURVIVORS      survivors - lx         2 females Y15-19
## 2     ITC4 Lombardia   SURVIVORS      survivors - lx         2 females Y15-19
## 3     ITC4 Lombardia   SURVIVORS      survivors - lx         9   total Y15-19
## 4     ITC4 Lombardia   SURVIVORS      survivors - lx         9   total Y15-19
## 5     ITC4 Lombardia      DEATHS         deaths - dx         2 females Y15-19
## 6     ITC4 Lombardia      DEATHS         deaths - dx         2 females Y15-19
##   Age.and.age.class TIME Select.time Value Flag.Codes Flags
## 1       15-19 years 2010        2010 99651         NA    NA
## 2       15-19 years 2019        2019 99652         NA    NA
## 3       15-19 years 2010        2010 99603         NA    NA
## 4       15-19 years 2019        2019 99610         NA    NA
## 5       15-19 years 2010        2010    75         NA    NA
## 6       15-19 years 2019        2019    49         NA    NA

Select the column of interest,filter the observation to analyse and set a new dataset to be paired:

require(dplyr)
my_data<-IT_life_exp%>%select(Territory,
                     TIPO_DATO15,
                     Biometric.functions,
                     Gender,
                     Age.and.age.class,
                     TIME,
                     Value)
                     
                
my_data_le<-my_data%>%
  dplyr::filter(Gender=="total",Biometric.functions=="life expectancy - ex")%>%
  dplyr::mutate(lifeExp=Value)%>%
  dplyr::select(Territory,TIME,lifeExp)
 
head(my_data_le) 
##                            Territory TIME lifeExp
## 1                          Lombardia 2010  67.317
## 2                          Lombardia 2019  68.987
## 3 Provincia Autonoma Bolzano / Bozen 2010  68.057
## 4 Provincia Autonoma Bolzano / Bozen 2019  69.273
## 5          Provincia Autonoma Trento 2010  67.784
## 6          Provincia Autonoma Trento 2019  69.645

Check of the obeservation as grouped and counted frequencies:

require(plyr)                     
count(my_data$Biometric.functions);                    
##                                          x freq
## 1                              deaths - dx 3024
## 2                     life expectancy - ex 3024
## 3 probability of death (per thousand) - qx 3024
## 4              projection probability - Px 3024
## 5                           survivors - lx 3024
## 6                         years lived - Lx 3024
count(my_data$Territory)                      
##                                     x freq
## 1                             Abruzzo  864
## 2                          Basilicata  864
## 3                            Calabria  864
## 4                            Campania  864
## 5                      Emilia-Romagna  864
## 6               Friuli-Venezia Giulia  864
## 7                               Lazio  864
## 8                             Liguria  864
## 9                           Lombardia  864
## 10                             Marche  864
## 11                             Molise  864
## 12                           Piemonte  864
## 13 Provincia Autonoma Bolzano / Bozen  864
## 14          Provincia Autonoma Trento  864
## 15                             Puglia  864
## 16                           Sardegna  864
## 17                            Sicilia  864
## 18                            Toscana  864
## 19                             Umbria  864
## 20     Valle d'Aosta / Vallée d'Aoste  864
## 21                             Veneto  864

Check and set the structure of key elements as needed:

head(my_data_le)
##                            Territory TIME lifeExp
## 1                          Lombardia 2010  67.317
## 2                          Lombardia 2019  68.987
## 3 Provincia Autonoma Bolzano / Bozen 2010  68.057
## 4 Provincia Autonoma Bolzano / Bozen 2019  69.273
## 5          Provincia Autonoma Trento 2010  67.784
## 6          Provincia Autonoma Trento 2019  69.645
str(my_data_le)
## 'data.frame':    1008 obs. of  3 variables:
##  $ Territory: Factor w/ 21 levels "Abruzzo","Basilicata",..: 9 9 13 13 14 14 21 21 3 3 ...
##  $ TIME     : int  2010 2019 2010 2019 2010 2019 2010 2019 2010 2019 ...
##  $ lifeExp  : num  67.3 69 68.1 69.3 67.8 ...
#my_data_le$territory<-as.character(my_data_le$territory)
names(my_data_le)<-tolower(names(my_data_le))

Pair the dateset by the time factor:

my_data_paired<-my_data_le%>%
  dplyr::mutate(paired = rep(1:(n()/2),each=2),
         time=factor(time))

Plot the life expectancy values across the age cathegories between 2010 and 2019:

require(ggplot2)

my_data_paired %>% 
  group_by(paired) %>%
  ggplot(aes(x= lifeexp, y= reorder(territory,lifeexp))) +
  geom_line(aes(group = paired),color="grey")+
  geom_point(aes(color=time), size=2) +
  labs(x="Life Expectancy",y="Region")