viewing the data

So we’re having population and death data of lung cancer from each age group by ROC era per observation year and district.

library(dplyr);library(tidyr);library(readxl)
data <- readxl::read_excel("C:\\Users\\gilli\\Downloads\\m155.xls")
head(data)
## # A tibble: 6 × 5
##   agegp     pop  year address death
##   <chr>   <dbl> <dbl>   <dbl> <dbl>
## 1 Y00_04 109317    61       1     1
## 2 Y05_09 116574    61       1    NA
## 3 Y10_14 114398    61       1    NA
## 4 Y15_19 102715    61       1    NA
## 5 Y20_24  85540    61       1     2
## 6 Y25_29  63738    61       1     1

Then we replace the NA death with 0, and group every 5 observation year as a new column called yeargp.

data[is.na(data)] <- 0
data$age <-as.numeric( substr(data$agegp,start = 2,stop =3))

data$yeargp[data$year <= 65] <- 'æ°‘61-65å¹´'
data$yeargp[data$year > 65 & data$year <= 70] <- 'æ°‘66-70å¹´'
data$yeargp[data$year > 70 & data$year <= 75] <- 'æ°‘71-75å¹´'
data$yeargp[data$year > 75 & data$year <= 80] <- 'æ°‘76-80å¹´'
data$yeargp[data$year > 80 & data$year <= 85] <- 'æ°‘81-85å¹´'
data$yeargp[data$year > 85 & data$year <= 90] <- 'æ°‘86-90å¹´'

Since we’re comparing the mortality from different age group, let’s break the data into the elders and the youngs.

elder <- data %>% filter(age>=50)%>% arrange(yeargp, agegp)
young <- data %>% filter(age<= 14)%>% arrange(yeargp, agegp)

We’ll start to calculate the mortality of every 5 year, so we have to sum the death and population during the time interval over different address. After that we can calculate the mortality rate.

elder_cu_age_specific <- elder %>%
        group_by(yeargp, agegp) %>%
        summarise(sum_death = sum(death),
                  sum_pop = sum(pop)) %>%
        mutate(rate = sum_death/sum_pop, rate10 = rate*100000)

young_cu_age_specific <- young %>%
        group_by(yeargp, agegp) %>%
        summarise(sum_death = sum(death),
                  sum_pop = sum(pop)) %>%
        mutate(rate = sum_death/sum_pop, rate10 = rate*100000)
elder_long <- elder_cu_age_specific %>%
        mutate(yeargp = factor(yeargp), agegp = factor(agegp)) %>%
        select(agegp, yeargp, rate10)
elder_apc <- pivot_wider(data = elder_long,names_from = yeargp, values_from = rate10)

young_long <- young_cu_age_specific %>%
        mutate(yeargp = factor(yeargp), agegp = factor(agegp)) %>%
        select(agegp, yeargp, rate10)
young_apc <- pivot_wider(data = young_long,names_from = yeargp, values_from = rate10)

So let’s see the aggregated data.

print(elder_apc)
## # A tibble: 8 × 7
##   agegp  `æ°‘61-65å¹´` `æ°‘66-70å¹´` `æ°‘71-75å¹´` `æ°‘76-80å¹´` `æ°‘81-85å¹´` `æ°‘86-90å¹´`
##   <fct>        <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>
## 1 Y50_54        60.7        68.4        72.7        75.4        83.9        81.5
## 2 Y55_59        84.2        85.8        97.6       100.        123.        121. 
## 3 Y60_64        98.6       106.        117.        120.        153.        164. 
## 4 Y65_69       118.        132.        142.        137.        170.        187. 
## 5 Y70_74       126.        130.        154.        172.        204.        199. 
## 6 Y75_79       122.        155.        184.        189.        238.        239. 
## 7 Y80_84       137.        147.        161.        171.        252.        261. 
## 8 Y85ABO       104.        135.        236.        210.        246.        254.
print(young_apc)
## # A tibble: 3 × 7
##   agegp  `æ°‘61-65å¹´` `æ°‘66-70å¹´` `æ°‘71-75å¹´` `æ°‘76-80å¹´` `æ°‘81-85å¹´` `æ°‘86-90å¹´`
##   <fct>        <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>
## 1 Y00_04       0.578       0.504       0.623       0.517       0.359       0.201
## 2 Y05_09       0.621       0.904       1.05        0.756       0.280       0.142
## 3 Y10_14       0.721       1.04        1.12        0.800       0.659       0.352

You can stop here and do it on hand for drawing apc in excel.

write.csv(elder_apc, "C:\\Users\\gilli\\Downloads\\New folder\\elder_apc.csv" )
write.csv(young_apc, "C:\\Users\\gilli\\Downloads\\New folder\\young_apc.csv")

Period effect

library(ggplot2)
par(mfrow = c(1,2))
ggplot(data = elder_long, aes(x = yeargp, y = rate10, col = agegp, group = agegp)) + 
        geom_point() + geom_line() + theme_bw()

ggplot(data = young_long, aes(x = yeargp, y = rate10, col = agegp, group = agegp)) + 
        geom_point() + geom_line() + theme_bw()

Age effect

par(mfrow = c(1,2))
ggplot(data = elder_long, aes(x = agegp, y = rate10, col = yeargp, group = yeargp)) + 
        geom_point() + geom_line() + theme_bw()

ggplot(data = young_long, aes(x = agegp, y = rate10, col = yeargp, group = yeargp)) + 
        geom_point() + geom_line() + theme_bw()

Cohort effect

First, let’s look at the group of people born earlier. Each line represents a cohort. We can see that the overall trend is moving from the bottom left to the top right, indicating that as these cohorts ages, the mortality rate increases. As we zoom in, we can see that the curves are shifting slightly vertically. This suggests that in these decades, there is a trend of increasing mortality rates

elder_apc %>% 
        ungroup() %>% 
        mutate(row = row_number()) %>% 
        gather(year, mortality, `æ°‘61-65å¹´`:`æ°‘86-90å¹´`) %>%
        arrange(row, desc(year)) %>%
        group_by(row) %>%
        mutate(time_left = row_number()) %>%
        transmute(agegp, year, mortality, start_year = time_left + row - 1)%>%
        ggplot(aes(agegp, mortality, color = factor(start_year), group = factor(start_year))) +
        geom_line() + ggtitle("cohort effect of the >50 population") + theme_bw()

Now let’s look at the younger generation, those who have been vaccinated. First, let’s look at the y-axis. The mortality rate per hundred thousand population is much lower compared to the previous axis. This is because they are younger. We cannot see the overall trend, however, for the slope of each cohort, the trend shifts from positive to negative. This means that the mortality rate decreases with each generation.

young_apc %>% 
        ungroup() %>% 
        mutate(row = row_number()) %>% 
        gather(year, mortality, `æ°‘61-65å¹´`:`æ°‘86-90å¹´`) %>%
        arrange(row, desc(year)) %>%
        group_by(row) %>%
        mutate(time_left = row_number()) %>%
        transmute(agegp, year, mortality, start_year = time_left + row - 1)%>%
        ggplot(aes(agegp, mortality, color = factor(start_year), group = factor(start_year))) +
        geom_line() + ggtitle("cohort effect of the <14 population") + theme_bw()

We can attribute the slope change on mortality on the intervention of HBV vaccine in Taiwan, since we’re only observing the cohort with lower age, which means of less exposure to develop kung cancer, and experiencing the HBV vaccine policy.