Dr Sadao Suzuki’s time effect materials
Epidemiology: Beyond the Basics: http://books.google.com/books/about/Epidemiology.html?id=TuJrwZEIY3UC
がん情報サービス(Cancer Info Service): http://ganjoho.jp/professional/statistics/statistics.html
Vital Statistics Japan (Ministry of Health, Labour and Welfare)
Matsuda A, Matsuda T, Shibata A, Katanoda K, Sobue T, Nishimoto H and The Japan Cancer Surveillance Research Group. Cancer Incidence and Incidence Rates in Japan in 2007: A Study of 21 Population-based Cancer Registries for the Monitoring of Cancer Incidence in Japan (MCIJ) Project. Japanese Journal of Clinical Oncology, 43(3): 328-336, 2013
cancer_mortality(1958-2011).xls (Center for Cancer Control and Information Services, National Cancer Center, Japan) was used.
## Load XLConnect
library(XLConnect)
## From a newly created file with sheet 4 (rate data) only
rate.all <- readWorksheetFromFile("./cancer_mortality(1958-2011)sheet4.xls", sheet = 1)
## Change variable names
names(rate.all) <- gsub("X", "age", names(rate.all))
names(rate.all) <- gsub("歳", "", names(rate.all))
names(rate.all) <- gsub("以上", "plus", names(rate.all))
names(rate.all) <- gsub("死亡年", "Cal_yr", names(rate.all))
names(rate.all) <- gsub("\\.", "_", names(rate.all))
## Show data
head(rate.all)
コード 部位 ICD ICDコード 性別 Cal_yr 粗率 age0_4 age5_9 age10_14 age15_19 age20_24 age25_29 age30_34
1 1 全部位 ICD-7 140-205 男女計 1958 95.53 6.936 4.088 3.864 5.087 6.688 11.78 22.69
2 1 全部位 ICD-7 140-205 男女計 1959 98.19 6.646 3.824 4.251 5.532 6.626 11.94 23.32
3 1 全部位 ICD-7 140-205 男女計 1960 100.38 7.700 4.161 4.411 5.597 7.165 11.35 22.55
4 1 全部位 ICD-7 140-205 男女計 1961 102.29 7.131 4.342 3.929 5.823 7.125 12.15 23.25
5 1 全部位 ICD-7 140-205 男女計 1962 103.20 7.502 4.119 4.898 6.133 7.934 12.75 22.99
6 1 全部位 ICD-7 140-205 男女計 1963 105.48 7.697 4.608 4.666 6.776 7.884 13.03 23.22
age35_39 age40_44 age45_49 age50_54 age55_59 age60_64 age65_69 age70_74 age75_79 age80_84 age85plus
1 42.70 77.23 132.2 212.1 327.7 479.0 656.2 814.5 839.8 722.4 510.9
2 41.96 80.38 129.9 210.4 338.0 479.3 662.7 831.1 862.8 760.5 549.2
3 42.18 75.49 131.7 209.4 328.2 478.6 667.7 845.1 892.8 782.7 615.4
4 41.04 74.90 128.5 206.6 334.5 474.1 666.3 839.4 912.9 825.2 584.6
5 40.38 72.98 124.6 204.8 324.9 476.8 664.1 838.5 915.2 796.1 611.1
6 41.40 72.57 127.7 204.7 319.6 477.8 668.2 861.9 931.5 865.6 631.4
## Extract all-sex data hepatic cancer mortality data
rate.hepatic <- subset(rate.all, 部位 == "肝臓" & 性別 == "男女計")
## Change to long format
library(reshape2)
rate.hepatic.melt <- melt(data = rate.hepatic,
##id.vars = c(),
measure.vars = names(rate.hepatic)[grep("age", names(rate.hepatic))],
variable.name = "Age_Range",
value.name = "Mortality_Rate"
)
## gsub("_", "-", as.character(rate.hepatic.melt$Age_Range))
## gsub("_", "-", as.character(rate.hepatic.melt$Age_Range))
## Regroup calendar year of death by five year intervals
rate.hepatic.melt$Cal_yr5 <- cut(rate.hepatic.melt$Cal_yr, breaks = seq(from = 1955, to = 2015, by = 5))
## Create a variable representing the lowest age in the interval
rate.hepatic.melt$age <- seq(from = 0, to = 85, by = 5)[rate.hepatic.melt$Age_Range]
## Calculate the year of birth
rate.hepatic.melt$Birth_yr <- with(rate.hepatic.melt, Cal_yr - age)
## Create the year of birth categories
rate.hepatic.melt$Birth_yr5 <- cut(rate.hepatic.melt$Birth_yr, breaks = seq(from = 1870, to = 2015, by = 5))
rate.hepatic.melt$Birth_yr30 <- cut(rate.hepatic.melt$Birth_yr, breaks = seq(from = 1870, to = 2030, by = 30))
## Check first 20 rows
head(rate.hepatic.melt, 20)
コード 部位 ICD ICDコード 性別 Cal_yr 粗率 Age_Range Mortality_Rate Cal_yr5 age Birth_yr
1 8 肝臓 ICD-7 155.0, 156 男女計 1958 9.011 age0_4 0.2345 (1955,1960] 0 1958
2 8 肝臓 ICD-7 155.0, 156 男女計 1959 9.048 age0_4 0.1499 (1955,1960] 0 1959
3 8 肝臓 ICD-7 155.0, 156 男女計 1960 9.438 age0_4 0.2295 (1955,1960] 0 1960
4 8 肝臓 ICD-7 155.0, 156 男女計 1961 9.115 age0_4 0.2056 (1960,1965] 0 1961
5 8 肝臓 ICD-7 155.0, 156 男女計 1962 9.066 age0_4 0.2041 (1960,1965] 0 1962
6 8 肝臓 ICD-7 155.0, 156 男女計 1963 9.054 age0_4 0.3043 (1960,1965] 0 1963
7 8 肝臓 ICD-7 155.0, 156 男女計 1964 8.943 age0_4 0.2640 (1960,1965] 0 1964
8 8 肝臓 ICD-7 155.0, 156 男女計 1965 8.652 age0_4 0.3197 (1960,1965] 0 1965
9 8 肝臓 ICD-7 155.0, 156 男女計 1966 8.992 age0_4 0.2875 (1965,1970] 0 1966
10 8 肝臓 ICD-7 155.0, 156 男女計 1967 8.801 age0_4 0.2683 (1965,1970] 0 1967
11 8 肝臓 ICD-8 155, 197.7, 197.8 男女計 1968 8.941 age0_4 0.4041 (1965,1970] 0 1968
12 8 肝臓 ICD-8 155, 197.7, 197.8 男女計 1969 9.284 age0_4 0.3242 (1965,1970] 0 1969
13 8 肝臓 ICD-8 155, 197.7, 197.8 男女計 1970 9.228 age0_4 0.3201 (1965,1970] 0 1970
14 8 肝臓 ICD-8 155, 197.7, 197.8 男女計 1971 9.355 age0_4 0.3122 (1970,1975] 0 1971
15 8 肝臓 ICD-8 155, 197.7, 197.8 男女計 1972 9.256 age0_4 0.3171 (1970,1975] 0 1972
16 8 肝臓 ICD-8 155, 197.7, 197.8 男女計 1973 9.282 age0_4 0.3572 (1970,1975] 0 1973
17 8 肝臓 ICD-8 155, 197.7, 197.8 男女計 1974 9.246 age0_4 0.2713 (1970,1975] 0 1974
18 8 肝臓 ICD-8 155, 197.7, 197.8 男女計 1975 9.517 age0_4 0.3624 (1970,1975] 0 1975
19 8 肝臓 ICD-8 155, 197.7, 197.8 男女計 1976 10.222 age0_4 0.3273 (1975,1980] 0 1976
20 8 肝臓 ICD-8 155, 197.7, 197.8 男女計 1977 10.572 age0_4 0.2516 (1975,1980] 0 1977
Birth_yr5 Birth_yr30
1 (1955,1960] (1.93e+03,1.96e+03]
2 (1955,1960] (1.93e+03,1.96e+03]
3 (1955,1960] (1.93e+03,1.96e+03]
4 (1960,1965] (1.96e+03,1.99e+03]
5 (1960,1965] (1.96e+03,1.99e+03]
6 (1960,1965] (1.96e+03,1.99e+03]
7 (1960,1965] (1.96e+03,1.99e+03]
8 (1960,1965] (1.96e+03,1.99e+03]
9 (1965,1970] (1.96e+03,1.99e+03]
10 (1965,1970] (1.96e+03,1.99e+03]
11 (1965,1970] (1.96e+03,1.99e+03]
12 (1965,1970] (1.96e+03,1.99e+03]
13 (1965,1970] (1.96e+03,1.99e+03]
14 (1970,1975] (1.96e+03,1.99e+03]
15 (1970,1975] (1.96e+03,1.99e+03]
16 (1970,1975] (1.96e+03,1.99e+03]
17 (1970,1975] (1.96e+03,1.99e+03]
18 (1970,1975] (1.96e+03,1.99e+03]
19 (1975,1980] (1.96e+03,1.99e+03]
20 (1975,1980] (1.96e+03,1.99e+03]
## Load ggplot2
library(ggplot2)
## Plot by calendar year, grouped by age of death
ggplot(data = rate.hepatic.melt,
mapping = aes(x = Birth_yr, y = Mortality_Rate, color = Age_Range)) +
layer(geom = "point") +
layer(geom = "line") +
labs(title = "Hepatic Cancer Mortality in Japan (Grouped by age at death)") +
theme_bw() +
theme(legend.key = element_blank(),
axis.text.x = element_text(angle=90, vjust=1))
## ?????????
## Plot by age at death, grouped by birth year
## ggplot(data = rate.hepatic.melt,
## mapping = aes(x = Age_Range, y = Mortality_Rate, group = factor(Birth_yr), color = Birth_yr5)) +
## layer(geom = "point") +
## layer(geom = "line") +
## labs(title = "Hepatic Cancer Mortality in Japan (Grouped by age at death)") +
## theme_bw() +
## theme(legend.key = element_blank(),
## axis.text.x = element_text(angle=90, vjust=1))
## Plot by calendar year, grouped by age of death
ggplot(data = rate.hepatic.melt,
mapping = aes(x = Cal_yr, y = Mortality_Rate, color = Age_Range)) +
layer(geom = "point") +
layer(geom = "line") +
labs(title = "Hepatic Cancer Mortality in Japan") +
theme_bw() +
theme(legend.key = element_blank(),
axis.text.x = element_text(angle=90, vjust=1))