Age-Period-Cohort analysis using Epi package

References

Load Epi package

library(Epi)

Example included in help for rateplot

Bladder cancer mortality in Italian males

Description:
     Number of deaths from bladder cancer and person-years in the
     Italian male population 1955-1979, in ages 25-79.

Format:
     A data frame with 55 observations on the following 4 variables:
          ‘age’:  Age at death. Left endpoint of age class 
       ‘period’:  Period of death. Left endpoint of period 
            ‘D’:  Number of deaths                         
            ‘Y’:  Number of person-years.                  
data(blcaIT)
attach(blcaIT)

## Table of rates:
bl.rate <- tapply( D, list(age,period), sum ) /
    tapply( Y, list(age,period), sum )
bl.rate
        1955      1960      1965      1970      1975
25 0.0000003 0.0000003 0.0000001 0.0000004 0.0000012
30 0.0000017 0.0000018 0.0000012 0.0000008 0.0000009
35 0.0000032 0.0000031 0.0000035 0.0000042 0.0000032
40 0.0000104 0.0000105 0.0000091 0.0000104 0.0000127
45 0.0000286 0.0000252 0.0000261 0.0000304 0.0000316
50 0.0000664 0.0000703 0.0000643 0.0000646 0.0000847
55 0.0001271 0.0001339 0.0001459 0.0001464 0.0001638
60 0.0002011 0.0002398 0.0002669 0.0002755 0.0002853
65 0.0002440 0.0003316 0.0004212 0.0004777 0.0005037
70 0.0003281 0.0004231 0.0005287 0.0006601 0.0007464
75 0.0004554 0.0004794 0.0006205 0.0008465 0.0010421

## The four classical plots:
par( mfrow=c(2,2) )
rateplot( bl.rate*10^6 )

plot of chunk unnamed-chunk-3


## The labels on the vertical axis could be nicer:
rateplot( bl.rate*10^6, at=10^(-1:3), labels=c(0.1,1,10,100,1000) ) 

plot of chunk unnamed-chunk-3


## More bells an whistles
par( mfrow=c(1,3), mar=c(3,3,1,1), oma=c(0,3,0,0), mgp=c(3,1,0)/1.6 )
rateplot( bl.rate*10^6, ylab="", ann=TRUE, which=c("AC","PA","CA"),
         at=10^(-1:3), labels=c(0.1,1,10,100,1000),
         col=topo.colors(11), cex.ann=1.2 ) 

plot of chunk unnamed-chunk-3

Japanese hepatic cancer data

## Load XLConnect
library(XLConnect)

## From a newly created file with sheet 4 (rate data) only
## Sheet 4 of cancer_mortality(1958-2011).xls was exctracted as a new file.
rate.all <- readWorksheetFromFile("./cancer_mortality(1958-2011)sheet4.xls", sheet = 1)

## Change variable names
names(rate.all) <- gsub("X", "age", names(rate.all))
names(rate.all) <- gsub("歳", "", names(rate.all))
names(rate.all) <- gsub("以上", "plus", names(rate.all))
names(rate.all) <- gsub("死亡年", "Cal_yr", names(rate.all))
names(rate.all) <- gsub("\\.", "_", names(rate.all))

## Show data
head(rate.all)
  コード   部位   ICD ICDコード   性別 Cal_yr   粗率 age0_4 age5_9 age10_14 age15_19 age20_24 age25_29 age30_34
1      1 全部位 ICD-7   140-205 男女計   1958  95.53  6.936  4.088    3.864    5.087    6.688    11.78    22.69
2      1 全部位 ICD-7   140-205 男女計   1959  98.19  6.646  3.824    4.251    5.532    6.626    11.94    23.32
3      1 全部位 ICD-7   140-205 男女計   1960 100.38  7.700  4.161    4.411    5.597    7.165    11.35    22.55
4      1 全部位 ICD-7   140-205 男女計   1961 102.29  7.131  4.342    3.929    5.823    7.125    12.15    23.25
5      1 全部位 ICD-7   140-205 男女計   1962 103.20  7.502  4.119    4.898    6.133    7.934    12.75    22.99
6      1 全部位 ICD-7   140-205 男女計   1963 105.48  7.697  4.608    4.666    6.776    7.884    13.03    23.22
  age35_39 age40_44 age45_49 age50_54 age55_59 age60_64 age65_69 age70_74 age75_79 age80_84 age85plus
1    42.70    77.23    132.2    212.1    327.7    479.0    656.2    814.5    839.8    722.4     510.9
2    41.96    80.38    129.9    210.4    338.0    479.3    662.7    831.1    862.8    760.5     549.2
3    42.18    75.49    131.7    209.4    328.2    478.6    667.7    845.1    892.8    782.7     615.4
4    41.04    74.90    128.5    206.6    334.5    474.1    666.3    839.4    912.9    825.2     584.6
5    40.38    72.98    124.6    204.8    324.9    476.8    664.1    838.5    915.2    796.1     611.1
6    41.40    72.57    127.7    204.7    319.6    477.8    668.2    861.9    931.5    865.6     631.4

## Extract all-sex data hepatic cancer mortality data
rate.hepatic <- subset(rate.all, 部位 == "肝臓" & 性別 == "男女計")


## Crate a matrix
mat.rate.hepatic <- t(rate.hepatic[,c("age0_4","age5_9","age10_14","age15_19","age20_24","age25_29","age30_34","age35_39","age40_44","age45_49","age50_54","age55_59","age60_64","age65_69","age70_74","age75_79","age80_84","age85plus")])

## Put calendar years in the column names
colnames(mat.rate.hepatic) <- rate.hepatic$Cal_yr


## Manipulate age rage
age.range <- rownames(mat.rate.hepatic)
age.range <- gsub("age","", age.range)
age.range <- gsub("_", " + ", age.range)
age.range <- gsub("plus", " + 85", age.range)
age.range
 [1] "0 + 4"   "5 + 9"   "10 + 14" "15 + 19" "20 + 24" "25 + 29" "30 + 34" "35 + 39" "40 + 44" "45 + 49" "50 + 54"
[12] "55 + 59" "60 + 64" "65 + 69" "70 + 74" "75 + 79" "80 + 84" "85 + 85"

## Obtain mean age in each range
age.mean <- sapply(age.range, function(x) {

    eval(parse(text = x)) / 2
})

## Add as row names
rownames(mat.rate.hepatic) <- age.mean

## Show partially
mat.rate.hepatic[, paste(seq(1960,2010,10))]
       1960     1970     1980      1990      2000      2010
2   0.22946  0.32014  0.28375   0.17002   0.17065   0.09516
7   0.06518  0.12346  0.05017   0.01345   0.05013   0.01802
12  0.08169  0.07693  0.08988   0.03531   0.03074   0.03399
17  0.13966  0.07779  0.04869   0.05016   0.02691   0.03318
22  0.32458  0.22652  0.12847   0.11466   0.10843   0.04758
27  0.40198  0.34303  0.26735   0.20059   0.31165   0.12579
32  0.93112  0.79254  0.82177   0.53157   0.47625   0.31653
37  2.20271  2.16622  2.09810   1.66557   1.01528   0.56771
42  5.18018  4.54432  4.29120   4.24765   2.73807   1.32939
47 10.06943  8.52781 12.28622   8.94361   7.04316   3.38935
52 20.58842 15.78405 22.04917  20.22650  15.86950   8.26727
57 31.58293 25.58100 30.22036  53.78691  29.80990  17.07475
62 50.17709 36.46980 42.85826  69.55507  54.35962  30.41402
67 65.95995 55.62109 54.10874  74.21127 101.76850  48.16395
72 90.10081 72.42389 75.09658  85.25292 113.61634  75.43588
77 98.77676 91.39815 86.66450  96.46794 114.81877 120.40739
82 77.03059 81.88417 96.87152 113.44529 123.85519 131.75886
85 68.08511 74.21364 79.86193 123.15040 118.29985 129.36414


## Four plots in one
layout(matrix(1:4, byrow = T, ncol = 2))

## Hepatic Cancer Mortality four plots at a time
## rates: A two-dimensional table (or array) with rates to be plotted.
##        It is assumed that the first dimension is age and the second
##        is period.
rateplot(rates = mat.rate.hepatic,

         ## ap: Cross-sectional age distribution
         ## ac: Longitudinal effect of age
         ## pa: Abnormal survey year detection
         ## ca: Abnormal birth cohort detection
         which = c("ap","ac","pa","ca"),

         a.lab = "Age at death",
         p.lab = "Year of death",
         c.lab = "Year of birth",

         lwd = 1,

         ann = TRUE,

         log.ax = "")

plot of chunk unnamed-chunk-4

From the left upper corner to right, then to below

   which: A character vector with elements from
          ‘c("ap","ac","apc","pa","ca")’, indication which plots should
          be produced. One plot per element is produced. The first
          letter indicates the x-axis of the plot, the remaining which
          groups should be connected, i.e. ‘"pa"’ will plot rates
          versus period and connect age-classes, and ‘"apc"’ will plot
          rates versus age, and connect both periods and cohorts.

Japanese lung cancer data (male)

## Extract all-sex data hepatic cancer mortality data
rate.lung <- subset(rate.all, 部位 == "肺" & 性別 == "男")

## Crate a matrix
mat.rate.lung <- t(rate.lung[,c("age0_4","age5_9","age10_14","age15_19","age20_24","age25_29","age30_34","age35_39","age40_44","age45_49","age50_54","age55_59","age60_64","age65_69","age70_74","age75_79","age80_84","age85plus")])

## Put calendar years in the column names
colnames(mat.rate.lung) <- rate.lung$Cal_yr

## Add as row names (previously made)
rownames(mat.rate.lung) <- age.mean

## Show partially
mat.rate.lung[, paste(seq(1960,2010,10))]
       1960      1970      1980      1990      2000      2010
2   0.04984   0.02231   0.04612   0.00000   0.00000   0.00000
7   0.02127   0.00000   0.00000   0.00000   0.03261   0.00000
12  0.00000   0.00000   0.04382   0.00000   0.02999   0.00000
17  0.17102   0.11017   0.09535   0.01958   0.05251   0.06459
22  0.24241   0.20835   0.10173   0.11267   0.11751   0.15487
27  0.36633   0.44538   0.46530   0.42124   0.20431   0.19215
32  0.69391   0.76945   0.87225   1.00208   1.28275   0.47847
37  1.12188   1.60858   2.49522   3.28905   2.40387   1.90799
42  2.72606   4.55118   5.02673   6.75017   7.44315   3.92528
47  7.66571   8.46862  11.20324  12.92495  14.29215  10.80842
52 15.77910  20.14121  26.36474  22.97684  28.24641  24.32130
57 30.46307  40.66644  48.83686  53.36462  51.70003  50.62214
62 46.53673  71.53334  96.02142 112.97150  89.62586  92.18574
67 71.37342 124.95873 157.16734 190.24189 173.14455 163.20160
72 86.94198 156.62663 252.72348 300.33676 307.23470 237.13111
77 81.76137 160.90044 329.13948 419.71227 451.91118 384.39454
82 62.07728 127.01470 321.35589 514.98755 596.15783 595.99186
85 37.19909  77.60657 242.34092 497.81315 637.36220 742.43968


## Four plots in one
layout(matrix(1:4, byrow = T, ncol = 2))

## Hepatic Cancer Mortality four plots at a time
## rates: A two-dimensional table (or array) with rates to be plotted.
##        It is assumed that the first dimension is age and the second
##        is period.
rateplot(rates = mat.rate.lung,

         ## ap: Cross-sectional age distribution
         ## ac: Longitudinal effect of age
         ## pa: Abnormal survey year detection
         ## ca: Abnormal birth cohort detection
         which = c("ap","ac","pa","ca"),

         a.lab = "Age at death",
         p.lab = "Year of death",
         c.lab = "Year of birth",

         lwd = 1,

         ann = TRUE,

         log.ax = "")

plot of chunk unnamed-chunk-5

Japanese lung cancer data (female)

## Extract all-sex data hepatic cancer mortality data
rate.lung <- subset(rate.all, 部位 == "肺" & 性別 == "女")

## Crate a matrix
mat.rate.lung <- t(rate.lung[,c("age0_4","age5_9","age10_14","age15_19","age20_24","age25_29","age30_34","age35_39","age40_44","age45_49","age50_54","age55_59","age60_64","age65_69","age70_74","age75_79","age80_84","age85plus")])

## Put calendar years in the column names
colnames(mat.rate.lung) <- rate.lung$Cal_yr

## Add as row names (previously made)
rownames(mat.rate.lung) <- age.mean

## Show partially
mat.rate.lung[, paste(seq(1960,2010,10))]
       1960     1970     1980      1990      2000      2010
2   0.02610  0.00000  0.02426   0.00000   0.03499   0.07796
7   0.02221  0.00000  0.00000   0.00000   0.00000   0.00000
12  0.01853  0.02616  0.00000   0.00000   0.00000   0.03484
17  0.10797  0.08969  0.00000   0.02058   0.00000   0.03410
22  0.16694  0.13169  0.05192   0.00000   0.00000   0.09752
27  0.14582  0.24194  0.26883   0.22838   0.19020   0.08543
32  0.84860  0.83956  0.77064   0.70660   0.73057   0.54537
37  1.25198  1.20455  1.70216   1.97925   1.87679   1.17613
42  1.78520  2.89743  2.90994   3.10345   3.84475   2.34260
47  4.92235  5.18403  5.12236   5.68876   5.87374   4.48017
52  6.38677  8.00126  9.59027  10.39815  12.75819   8.57679
57 10.49469 14.20141 15.89865  15.66537  18.15184  15.89626
62 16.73312 24.54325 27.37435  25.27686  26.89083  27.98076
67 22.05735 32.01656 42.11194  42.73573  42.84688  42.77963
72 23.32695 41.30249 63.70532  69.27759  62.57549  61.59300
77 25.43376 46.65860 84.38975  99.61830  97.63934  90.76499
82 19.75900 47.60035 90.44219 119.83452 144.60732 134.01760
85 15.96388 25.70520 67.83405 150.22711 195.44655 210.57449


## Four plots in one
layout(matrix(1:4, byrow = T, ncol = 2))

## Hepatic Cancer Mortality four plots at a time
## rates: A two-dimensional table (or array) with rates to be plotted.
##        It is assumed that the first dimension is age and the second
##        is period.
rateplot(rates = mat.rate.lung,

         ## ap: Cross-sectional age distribution
         ## ac: Longitudinal effect of age
         ## pa: Abnormal survey year detection
         ## ca: Abnormal birth cohort detection
         which = c("ap","ac","pa","ca"),

         a.lab = "Age at death",
         p.lab = "Year of death",
         c.lab = "Year of birth",

         lwd = 1,

         ann = TRUE,

         log.ax = "")

plot of chunk unnamed-chunk-6