#create a list of the files from your target directory
<- list.files(path="C:\\Users\\Datarist\\OneDrive - VietNam National University - HCM INTERNATIONAL UNIVERSITY\\Study-IU\\FMath1\\male\\mltper_1x1")
file_list
=gsub(".mltper_1x1.txt","",file_list)
code
<- list.files(path="C:\\Users\\Datarist\\OneDrive - VietNam National University - HCM INTERNATIONAL UNIVERSITY\\Study-IU\\FMath1\\male\\mltper_1x1",full.names = T)
file_list_full
=lapply(file_list_full, fread)
lnames(l)=code
=rbindlist(l,use.names = T,fill=T,idcol="Country") f
#country_code=readxl::read_excel("country_code.xlsx")
#country_code
=f
final#%>% left_join(country_code,by=c("file"="code"))
=final %>% select(Country,Year, Age, qx,lx,dx) %>% mutate(px=1-as.numeric(qx)) %>% filter(Year==2002,!is.na(Country)) data
Year: Year of observing data
Age: Age group
q(x): Probability of death between ages x and x+1
p(x): Probability of alive between ages x and x+1
l(x): Number of survivors at exact age x, assuming l(0) = 100,000
d(x): Number of deaths between ages x and x+1
=="110+",]$Age="110"
data[Age
=data %>% mutate_at(names(data)[-1], as.numeric)
data glimpse(data)
## Rows: 5,439
## Columns: 7
## $ Country <chr> "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS",…
## $ Year <dbl> 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 20…
## $ Age <dbl> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, …
## $ qx <dbl> 0.00529, 0.00047, 0.00034, 0.00031, 0.00015, 0.00014, 0.00011,…
## $ lx <dbl> 100000, 99471, 99424, 99391, 99360, 99345, 99332, 99320, 99303…
## $ dx <dbl> 529, 47, 34, 30, 15, 14, 11, 17, 10, 16, 19, 7, 11, 19, 25, 26…
## $ px <dbl> 0.99471, 0.99953, 0.99966, 0.99969, 0.99985, 0.99986, 0.99989,…
%>% filter(Country=="USA",Age==40) %>% select(qx) data
## qx
## 1: 0.00268
=data %>% ggplot(aes(x=Age, y=log(qx),col=Country,group=1))+geom_line()
tggplotly(t, height = 500, width = 800)
Given a baby born in 2002, the probability he/her can live over a certain age is the cumulative product of probability of alive from the current age to that age.
For example, compute the survival probability over the first 5 years of a male baby born in 2002 in USA.
%>% filter(Country=="U.S.A.",Age %in% c(0:5)) %>% summarize(sp=prod(px)) data
## sp
## 1 1
Compute the survival probability over the next 50 years of a 20-year male born in 2002 in USA.
%>% filter(Country=="U.S.A.",Age %in% c(20:70)) %>% summarize(sp=prod(px)) data
## sp
## 1 1
=data %>%group_by(Country)%>% mutate(cpx=cumprod(px))%>% ggplot(aes(x=Age, y=cpx,col=Country,group=1))+geom_line()
tggplotly(t, height = 500, width = 800)
Given a person born in 2002 and still alive in 2022, the probability he/her can live over a certain age is the cumulative product of probability of alive from age 20 to that age.
=data %>% filter(Age>20) %>%group_by(Country)%>% mutate(cpx=cumprod(px))%>% ggplot(aes(x=Age, y=cpx,col=Country,group=1))+geom_line()
tggplotly(t, height = 500, width = 800)