library(Epi)
Bladder cancer mortality in Italian males
Description:
Number of deaths from bladder cancer and person-years in the
Italian male population 1955-1979, in ages 25-79.
Format:
A data frame with 55 observations on the following 4 variables:
‘age’: Age at death. Left endpoint of age class
‘period’: Period of death. Left endpoint of period
‘D’: Number of deaths
‘Y’: Number of person-years.
data(blcaIT)
attach(blcaIT)
## Table of rates:
bl.rate <- tapply( D, list(age,period), sum ) /
tapply( Y, list(age,period), sum )
bl.rate
1955 1960 1965 1970 1975
25 0.0000003 0.0000003 0.0000001 0.0000004 0.0000012
30 0.0000017 0.0000018 0.0000012 0.0000008 0.0000009
35 0.0000032 0.0000031 0.0000035 0.0000042 0.0000032
40 0.0000104 0.0000105 0.0000091 0.0000104 0.0000127
45 0.0000286 0.0000252 0.0000261 0.0000304 0.0000316
50 0.0000664 0.0000703 0.0000643 0.0000646 0.0000847
55 0.0001271 0.0001339 0.0001459 0.0001464 0.0001638
60 0.0002011 0.0002398 0.0002669 0.0002755 0.0002853
65 0.0002440 0.0003316 0.0004212 0.0004777 0.0005037
70 0.0003281 0.0004231 0.0005287 0.0006601 0.0007464
75 0.0004554 0.0004794 0.0006205 0.0008465 0.0010421
## The four classical plots:
par( mfrow=c(2,2) )
rateplot( bl.rate*10^6 )
## The labels on the vertical axis could be nicer:
rateplot( bl.rate*10^6, at=10^(-1:3), labels=c(0.1,1,10,100,1000) )
## More bells an whistles
par( mfrow=c(1,3), mar=c(3,3,1,1), oma=c(0,3,0,0), mgp=c(3,1,0)/1.6 )
rateplot( bl.rate*10^6, ylab="", ann=TRUE, which=c("AC","PA","CA"),
at=10^(-1:3), labels=c(0.1,1,10,100,1000),
col=topo.colors(11), cex.ann=1.2 )
See also the ggplot version: http://rpubs.com/kaz_yos/epi-cohort-effect
Dr Sadao Suzuki’s time effect materials
Epidemiology: Beyond the Basics: http://books.google.com/books/about/Epidemiology.html?id=TuJrwZEIY3UC
がん情報サービス(Cancer Info Service): http://ganjoho.jp/professional/statistics/statistics.html
Vital Statistics Japan (Ministry of Health, Labour and Welfare)
Matsuda A, Matsuda T, Shibata A, Katanoda K, Sobue T, Nishimoto H and The Japan Cancer Surveillance Research Group. Cancer Incidence and Incidence Rates in Japan in 2007: A Study of 21 Population-based Cancer Registries for the Monitoring of Cancer Incidence in Japan (MCIJ) Project. Japanese Journal of Clinical Oncology, 43(3): 328-336, 2013
## Load XLConnect
library(XLConnect)
## From a newly created file with sheet 4 (rate data) only
## Sheet 4 of cancer_mortality(1958-2011).xls was exctracted as a new file.
rate.all <- readWorksheetFromFile("./cancer_mortality(1958-2011)sheet4.xls", sheet = 1)
## Change variable names
names(rate.all) <- gsub("X", "age", names(rate.all))
names(rate.all) <- gsub("歳", "", names(rate.all))
names(rate.all) <- gsub("以上", "plus", names(rate.all))
names(rate.all) <- gsub("死亡年", "Cal_yr", names(rate.all))
names(rate.all) <- gsub("\\.", "_", names(rate.all))
## Show data
head(rate.all)
コード 部位 ICD ICDコード 性別 Cal_yr 粗率 age0_4 age5_9 age10_14 age15_19 age20_24 age25_29 age30_34
1 1 全部位 ICD-7 140-205 男女計 1958 95.53 6.936 4.088 3.864 5.087 6.688 11.78 22.69
2 1 全部位 ICD-7 140-205 男女計 1959 98.19 6.646 3.824 4.251 5.532 6.626 11.94 23.32
3 1 全部位 ICD-7 140-205 男女計 1960 100.38 7.700 4.161 4.411 5.597 7.165 11.35 22.55
4 1 全部位 ICD-7 140-205 男女計 1961 102.29 7.131 4.342 3.929 5.823 7.125 12.15 23.25
5 1 全部位 ICD-7 140-205 男女計 1962 103.20 7.502 4.119 4.898 6.133 7.934 12.75 22.99
6 1 全部位 ICD-7 140-205 男女計 1963 105.48 7.697 4.608 4.666 6.776 7.884 13.03 23.22
age35_39 age40_44 age45_49 age50_54 age55_59 age60_64 age65_69 age70_74 age75_79 age80_84 age85plus
1 42.70 77.23 132.2 212.1 327.7 479.0 656.2 814.5 839.8 722.4 510.9
2 41.96 80.38 129.9 210.4 338.0 479.3 662.7 831.1 862.8 760.5 549.2
3 42.18 75.49 131.7 209.4 328.2 478.6 667.7 845.1 892.8 782.7 615.4
4 41.04 74.90 128.5 206.6 334.5 474.1 666.3 839.4 912.9 825.2 584.6
5 40.38 72.98 124.6 204.8 324.9 476.8 664.1 838.5 915.2 796.1 611.1
6 41.40 72.57 127.7 204.7 319.6 477.8 668.2 861.9 931.5 865.6 631.4
## Extract all-sex data hepatic cancer mortality data
rate.hepatic <- subset(rate.all, 部位 == "肝臓" & 性別 == "男女計")
## Crate a matrix
mat.rate.hepatic <- t(rate.hepatic[,c("age0_4","age5_9","age10_14","age15_19","age20_24","age25_29","age30_34","age35_39","age40_44","age45_49","age50_54","age55_59","age60_64","age65_69","age70_74","age75_79","age80_84","age85plus")])
## Put calendar years in the column names
colnames(mat.rate.hepatic) <- rate.hepatic$Cal_yr
## Manipulate age rage
age.range <- rownames(mat.rate.hepatic)
age.range <- gsub("age","", age.range)
age.range <- gsub("_", " + ", age.range)
age.range <- gsub("plus", " + 85", age.range)
age.range
[1] "0 + 4" "5 + 9" "10 + 14" "15 + 19" "20 + 24" "25 + 29" "30 + 34" "35 + 39" "40 + 44" "45 + 49" "50 + 54"
[12] "55 + 59" "60 + 64" "65 + 69" "70 + 74" "75 + 79" "80 + 84" "85 + 85"
## Obtain mean age in each range
age.mean <- sapply(age.range, function(x) {
eval(parse(text = x)) / 2
})
## Add as row names
rownames(mat.rate.hepatic) <- age.mean
## Show partially
mat.rate.hepatic[, paste(seq(1960,2010,10))]
1960 1970 1980 1990 2000 2010
2 0.22946 0.32014 0.28375 0.17002 0.17065 0.09516
7 0.06518 0.12346 0.05017 0.01345 0.05013 0.01802
12 0.08169 0.07693 0.08988 0.03531 0.03074 0.03399
17 0.13966 0.07779 0.04869 0.05016 0.02691 0.03318
22 0.32458 0.22652 0.12847 0.11466 0.10843 0.04758
27 0.40198 0.34303 0.26735 0.20059 0.31165 0.12579
32 0.93112 0.79254 0.82177 0.53157 0.47625 0.31653
37 2.20271 2.16622 2.09810 1.66557 1.01528 0.56771
42 5.18018 4.54432 4.29120 4.24765 2.73807 1.32939
47 10.06943 8.52781 12.28622 8.94361 7.04316 3.38935
52 20.58842 15.78405 22.04917 20.22650 15.86950 8.26727
57 31.58293 25.58100 30.22036 53.78691 29.80990 17.07475
62 50.17709 36.46980 42.85826 69.55507 54.35962 30.41402
67 65.95995 55.62109 54.10874 74.21127 101.76850 48.16395
72 90.10081 72.42389 75.09658 85.25292 113.61634 75.43588
77 98.77676 91.39815 86.66450 96.46794 114.81877 120.40739
82 77.03059 81.88417 96.87152 113.44529 123.85519 131.75886
85 68.08511 74.21364 79.86193 123.15040 118.29985 129.36414
## Four plots in one
layout(matrix(1:4, byrow = T, ncol = 2))
## Hepatic Cancer Mortality four plots at a time
## rates: A two-dimensional table (or array) with rates to be plotted.
## It is assumed that the first dimension is age and the second
## is period.
rateplot(rates = mat.rate.hepatic,
## ap: Cross-sectional age distribution
## ac: Longitudinal effect of age
## pa: Abnormal survey year detection
## ca: Abnormal birth cohort detection
which = c("ap","ac","pa","ca"),
a.lab = "Age at death",
p.lab = "Year of death",
c.lab = "Year of birth",
lwd = 1,
ann = TRUE,
log.ax = "")
From the left upper corner to right, then to below
which: A character vector with elements from
‘c("ap","ac","apc","pa","ca")’, indication which plots should
be produced. One plot per element is produced. The first
letter indicates the x-axis of the plot, the remaining which
groups should be connected, i.e. ‘"pa"’ will plot rates
versus period and connect age-classes, and ‘"apc"’ will plot
rates versus age, and connect both periods and cohorts.
## Extract all-sex data hepatic cancer mortality data
rate.lung <- subset(rate.all, 部位 == "肺" & 性別 == "男")
## Crate a matrix
mat.rate.lung <- t(rate.lung[,c("age0_4","age5_9","age10_14","age15_19","age20_24","age25_29","age30_34","age35_39","age40_44","age45_49","age50_54","age55_59","age60_64","age65_69","age70_74","age75_79","age80_84","age85plus")])
## Put calendar years in the column names
colnames(mat.rate.lung) <- rate.lung$Cal_yr
## Add as row names (previously made)
rownames(mat.rate.lung) <- age.mean
## Show partially
mat.rate.lung[, paste(seq(1960,2010,10))]
1960 1970 1980 1990 2000 2010
2 0.04984 0.02231 0.04612 0.00000 0.00000 0.00000
7 0.02127 0.00000 0.00000 0.00000 0.03261 0.00000
12 0.00000 0.00000 0.04382 0.00000 0.02999 0.00000
17 0.17102 0.11017 0.09535 0.01958 0.05251 0.06459
22 0.24241 0.20835 0.10173 0.11267 0.11751 0.15487
27 0.36633 0.44538 0.46530 0.42124 0.20431 0.19215
32 0.69391 0.76945 0.87225 1.00208 1.28275 0.47847
37 1.12188 1.60858 2.49522 3.28905 2.40387 1.90799
42 2.72606 4.55118 5.02673 6.75017 7.44315 3.92528
47 7.66571 8.46862 11.20324 12.92495 14.29215 10.80842
52 15.77910 20.14121 26.36474 22.97684 28.24641 24.32130
57 30.46307 40.66644 48.83686 53.36462 51.70003 50.62214
62 46.53673 71.53334 96.02142 112.97150 89.62586 92.18574
67 71.37342 124.95873 157.16734 190.24189 173.14455 163.20160
72 86.94198 156.62663 252.72348 300.33676 307.23470 237.13111
77 81.76137 160.90044 329.13948 419.71227 451.91118 384.39454
82 62.07728 127.01470 321.35589 514.98755 596.15783 595.99186
85 37.19909 77.60657 242.34092 497.81315 637.36220 742.43968
## Four plots in one
layout(matrix(1:4, byrow = T, ncol = 2))
## Hepatic Cancer Mortality four plots at a time
## rates: A two-dimensional table (or array) with rates to be plotted.
## It is assumed that the first dimension is age and the second
## is period.
rateplot(rates = mat.rate.lung,
## ap: Cross-sectional age distribution
## ac: Longitudinal effect of age
## pa: Abnormal survey year detection
## ca: Abnormal birth cohort detection
which = c("ap","ac","pa","ca"),
a.lab = "Age at death",
p.lab = "Year of death",
c.lab = "Year of birth",
lwd = 1,
ann = TRUE,
log.ax = "")
## Extract all-sex data hepatic cancer mortality data
rate.lung <- subset(rate.all, 部位 == "肺" & 性別 == "女")
## Crate a matrix
mat.rate.lung <- t(rate.lung[,c("age0_4","age5_9","age10_14","age15_19","age20_24","age25_29","age30_34","age35_39","age40_44","age45_49","age50_54","age55_59","age60_64","age65_69","age70_74","age75_79","age80_84","age85plus")])
## Put calendar years in the column names
colnames(mat.rate.lung) <- rate.lung$Cal_yr
## Add as row names (previously made)
rownames(mat.rate.lung) <- age.mean
## Show partially
mat.rate.lung[, paste(seq(1960,2010,10))]
1960 1970 1980 1990 2000 2010
2 0.02610 0.00000 0.02426 0.00000 0.03499 0.07796
7 0.02221 0.00000 0.00000 0.00000 0.00000 0.00000
12 0.01853 0.02616 0.00000 0.00000 0.00000 0.03484
17 0.10797 0.08969 0.00000 0.02058 0.00000 0.03410
22 0.16694 0.13169 0.05192 0.00000 0.00000 0.09752
27 0.14582 0.24194 0.26883 0.22838 0.19020 0.08543
32 0.84860 0.83956 0.77064 0.70660 0.73057 0.54537
37 1.25198 1.20455 1.70216 1.97925 1.87679 1.17613
42 1.78520 2.89743 2.90994 3.10345 3.84475 2.34260
47 4.92235 5.18403 5.12236 5.68876 5.87374 4.48017
52 6.38677 8.00126 9.59027 10.39815 12.75819 8.57679
57 10.49469 14.20141 15.89865 15.66537 18.15184 15.89626
62 16.73312 24.54325 27.37435 25.27686 26.89083 27.98076
67 22.05735 32.01656 42.11194 42.73573 42.84688 42.77963
72 23.32695 41.30249 63.70532 69.27759 62.57549 61.59300
77 25.43376 46.65860 84.38975 99.61830 97.63934 90.76499
82 19.75900 47.60035 90.44219 119.83452 144.60732 134.01760
85 15.96388 25.70520 67.83405 150.22711 195.44655 210.57449
## Four plots in one
layout(matrix(1:4, byrow = T, ncol = 2))
## Hepatic Cancer Mortality four plots at a time
## rates: A two-dimensional table (or array) with rates to be plotted.
## It is assumed that the first dimension is age and the second
## is period.
rateplot(rates = mat.rate.lung,
## ap: Cross-sectional age distribution
## ac: Longitudinal effect of age
## pa: Abnormal survey year detection
## ca: Abnormal birth cohort detection
which = c("ap","ac","pa","ca"),
a.lab = "Age at death",
p.lab = "Year of death",
c.lab = "Year of birth",
lwd = 1,
ann = TRUE,
log.ax = "")