Age, Cohort, and Period Effects using Cancer Mortality data in Japan

References

Data Preparation

cancer_mortality(1958-2011).xls (Center for Cancer Control and Information Services, National Cancer Center, Japan) was used.

## Load XLConnect
library(XLConnect)

## From a newly created file with sheet 4 (rate data) only
rate.all <- readWorksheetFromFile("./cancer_mortality(1958-2011)sheet4.xls", sheet = 1)

## Change variable names
names(rate.all) <- gsub("X", "age", names(rate.all))
names(rate.all) <- gsub("歳", "", names(rate.all))
names(rate.all) <- gsub("以上", "plus", names(rate.all))
names(rate.all) <- gsub("死亡年", "Cal_yr", names(rate.all))
names(rate.all) <- gsub("\\.", "_", names(rate.all))

## Show data
head(rate.all)
  コード   部位   ICD ICDコード   性別 Cal_yr   粗率 age0_4 age5_9 age10_14 age15_19 age20_24 age25_29 age30_34
1      1 全部位 ICD-7   140-205 男女計   1958  95.53  6.936  4.088    3.864    5.087    6.688    11.78    22.69
2      1 全部位 ICD-7   140-205 男女計   1959  98.19  6.646  3.824    4.251    5.532    6.626    11.94    23.32
3      1 全部位 ICD-7   140-205 男女計   1960 100.38  7.700  4.161    4.411    5.597    7.165    11.35    22.55
4      1 全部位 ICD-7   140-205 男女計   1961 102.29  7.131  4.342    3.929    5.823    7.125    12.15    23.25
5      1 全部位 ICD-7   140-205 男女計   1962 103.20  7.502  4.119    4.898    6.133    7.934    12.75    22.99
6      1 全部位 ICD-7   140-205 男女計   1963 105.48  7.697  4.608    4.666    6.776    7.884    13.03    23.22
  age35_39 age40_44 age45_49 age50_54 age55_59 age60_64 age65_69 age70_74 age75_79 age80_84 age85plus
1    42.70    77.23    132.2    212.1    327.7    479.0    656.2    814.5    839.8    722.4     510.9
2    41.96    80.38    129.9    210.4    338.0    479.3    662.7    831.1    862.8    760.5     549.2
3    42.18    75.49    131.7    209.4    328.2    478.6    667.7    845.1    892.8    782.7     615.4
4    41.04    74.90    128.5    206.6    334.5    474.1    666.3    839.4    912.9    825.2     584.6
5    40.38    72.98    124.6    204.8    324.9    476.8    664.1    838.5    915.2    796.1     611.1
6    41.40    72.57    127.7    204.7    319.6    477.8    668.2    861.9    931.5    865.6     631.4

Graphing hepatic cancer data

## Extract all-sex data hepatic cancer mortality data
rate.hepatic <- subset(rate.all, 部位 == "肝臓" & 性別 == "男女計")

## Change to long format
library(reshape2)
rate.hepatic.melt <- melt(data          = rate.hepatic,
                          ##id.vars       = c(),
                          measure.vars  = names(rate.hepatic)[grep("age", names(rate.hepatic))],
                          variable.name = "Age_Range",
                          value.name    = "Mortality_Rate"
                          )

## gsub("_", "-", as.character(rate.hepatic.melt$Age_Range))
## gsub("_", "-", as.character(rate.hepatic.melt$Age_Range))

## Regroup calendar year of death by five year intervals
rate.hepatic.melt$Cal_yr5 <- cut(rate.hepatic.melt$Cal_yr, breaks = seq(from = 1955, to = 2015, by = 5))

## Create a variable representing the lowest age in the interval
rate.hepatic.melt$age <- seq(from = 0, to = 85, by = 5)[rate.hepatic.melt$Age_Range]

## Calculate the year of birth
rate.hepatic.melt$Birth_yr <- with(rate.hepatic.melt, Cal_yr - age)

## Create the year of birth categories
rate.hepatic.melt$Birth_yr5 <- cut(rate.hepatic.melt$Birth_yr, breaks = seq(from = 1870, to = 2015, by = 5))
rate.hepatic.melt$Birth_yr30 <- cut(rate.hepatic.melt$Birth_yr, breaks = seq(from = 1870, to = 2030, by = 30))


## Check first 20 rows
head(rate.hepatic.melt, 20)
   コード 部位   ICD         ICDコード   性別 Cal_yr   粗率 Age_Range Mortality_Rate     Cal_yr5 age Birth_yr
1       8 肝臓 ICD-7        155.0, 156 男女計   1958  9.011    age0_4         0.2345 (1955,1960]   0     1958
2       8 肝臓 ICD-7        155.0, 156 男女計   1959  9.048    age0_4         0.1499 (1955,1960]   0     1959
3       8 肝臓 ICD-7        155.0, 156 男女計   1960  9.438    age0_4         0.2295 (1955,1960]   0     1960
4       8 肝臓 ICD-7        155.0, 156 男女計   1961  9.115    age0_4         0.2056 (1960,1965]   0     1961
5       8 肝臓 ICD-7        155.0, 156 男女計   1962  9.066    age0_4         0.2041 (1960,1965]   0     1962
6       8 肝臓 ICD-7        155.0, 156 男女計   1963  9.054    age0_4         0.3043 (1960,1965]   0     1963
7       8 肝臓 ICD-7        155.0, 156 男女計   1964  8.943    age0_4         0.2640 (1960,1965]   0     1964
8       8 肝臓 ICD-7        155.0, 156 男女計   1965  8.652    age0_4         0.3197 (1960,1965]   0     1965
9       8 肝臓 ICD-7        155.0, 156 男女計   1966  8.992    age0_4         0.2875 (1965,1970]   0     1966
10      8 肝臓 ICD-7        155.0, 156 男女計   1967  8.801    age0_4         0.2683 (1965,1970]   0     1967
11      8 肝臓 ICD-8 155, 197.7, 197.8 男女計   1968  8.941    age0_4         0.4041 (1965,1970]   0     1968
12      8 肝臓 ICD-8 155, 197.7, 197.8 男女計   1969  9.284    age0_4         0.3242 (1965,1970]   0     1969
13      8 肝臓 ICD-8 155, 197.7, 197.8 男女計   1970  9.228    age0_4         0.3201 (1965,1970]   0     1970
14      8 肝臓 ICD-8 155, 197.7, 197.8 男女計   1971  9.355    age0_4         0.3122 (1970,1975]   0     1971
15      8 肝臓 ICD-8 155, 197.7, 197.8 男女計   1972  9.256    age0_4         0.3171 (1970,1975]   0     1972
16      8 肝臓 ICD-8 155, 197.7, 197.8 男女計   1973  9.282    age0_4         0.3572 (1970,1975]   0     1973
17      8 肝臓 ICD-8 155, 197.7, 197.8 男女計   1974  9.246    age0_4         0.2713 (1970,1975]   0     1974
18      8 肝臓 ICD-8 155, 197.7, 197.8 男女計   1975  9.517    age0_4         0.3624 (1970,1975]   0     1975
19      8 肝臓 ICD-8 155, 197.7, 197.8 男女計   1976 10.222    age0_4         0.3273 (1975,1980]   0     1976
20      8 肝臓 ICD-8 155, 197.7, 197.8 男女計   1977 10.572    age0_4         0.2516 (1975,1980]   0     1977
     Birth_yr5          Birth_yr30
1  (1955,1960] (1.93e+03,1.96e+03]
2  (1955,1960] (1.93e+03,1.96e+03]
3  (1955,1960] (1.93e+03,1.96e+03]
4  (1960,1965] (1.96e+03,1.99e+03]
5  (1960,1965] (1.96e+03,1.99e+03]
6  (1960,1965] (1.96e+03,1.99e+03]
7  (1960,1965] (1.96e+03,1.99e+03]
8  (1960,1965] (1.96e+03,1.99e+03]
9  (1965,1970] (1.96e+03,1.99e+03]
10 (1965,1970] (1.96e+03,1.99e+03]
11 (1965,1970] (1.96e+03,1.99e+03]
12 (1965,1970] (1.96e+03,1.99e+03]
13 (1965,1970] (1.96e+03,1.99e+03]
14 (1970,1975] (1.96e+03,1.99e+03]
15 (1970,1975] (1.96e+03,1.99e+03]
16 (1970,1975] (1.96e+03,1.99e+03]
17 (1970,1975] (1.96e+03,1.99e+03]
18 (1970,1975] (1.96e+03,1.99e+03]
19 (1975,1980] (1.96e+03,1.99e+03]
20 (1975,1980] (1.96e+03,1.99e+03]

## Load ggplot2
library(ggplot2)

## Plot by calendar year, grouped by age of death
ggplot(data = rate.hepatic.melt,
       mapping = aes(x = Birth_yr, y = Mortality_Rate, color = Age_Range)) + 
    layer(geom = "point") +
    layer(geom = "line") +
    labs(title = "Hepatic Cancer Mortality in Japan (Grouped by age at death)") + 
    theme_bw() +
    theme(legend.key = element_blank(),
          axis.text.x = element_text(angle=90, vjust=1))

plot of chunk unnamed-chunk-3



## ?????????
## Plot by age at death, grouped by birth year
## ggplot(data = rate.hepatic.melt,
##        mapping = aes(x = Age_Range, y = Mortality_Rate, group = factor(Birth_yr), color = Birth_yr5)) + 
##     layer(geom = "point") +
##     layer(geom = "line") +
##     labs(title = "Hepatic Cancer Mortality in Japan (Grouped by age at death)") + 
##     theme_bw() +
##     theme(legend.key = element_blank(),
##           axis.text.x = element_text(angle=90, vjust=1))


## Plot by calendar year, grouped by age of death
ggplot(data = rate.hepatic.melt,
       mapping = aes(x = Cal_yr, y = Mortality_Rate, color = Age_Range)) + 
    layer(geom = "point") +
    layer(geom = "line") +
    labs(title = "Hepatic Cancer Mortality in Japan") + 
    theme_bw() +
    theme(legend.key = element_blank(),
          axis.text.x = element_text(angle=90, vjust=1))

plot of chunk unnamed-chunk-3