pacman::p_load(tidyverse, lubridate, tableone,
survival, survminer, car, svglite,missForest)
user.data_8.detect <- read.csv("/Users/abekohei/Desktop/HDS演習_DeSC/HDS_DeSC/user.data_8.欠測値補完用.csv")
colnames(user.data_8.detect)
## [1] "ID" "gender" "age"
## [4] "kencom登録年月" "資格取得年月" "資格喪失年月"
## [7] "レセプトデータ開始年月" "レセプトデータ終了年月" "baseline健診受診年月"
## [10] "weight_nz.b" "systolicBP_nz.b" "diastolicBP_nz.b"
## [13] "ldl_c_nz.b" "hdl_c_nz.b" "TG_nz.b"
## [16] "HbA1c_nz.b" "データ有り月数_total" "データ有り月数_pre"
## [19] "データ有り月数_post" "月ごとの平均データ数" "step_mean_pre"
## [22] "step_mean_post" "step_sd_pre" "step_sd_post"
## [25] "step_mean_pre.wd" "step_mean_post.wd" "step_sd_pre.wd"
## [28] "step_sd_post.wd" "step_mean_pre.we" "step_mean_post.we"
## [31] "step_sd_pre.we" "step_sd_post.we" "step.diff"
## [34] "step.diff.wd" "step.diff.we" "weight_nz.f"
## [37] "systolicBP_nz.f" "diastolicBP_nz.f" "ldl_c_nz.f"
## [40] "hdl_c_nz.f" "TG_nz.f" "HbA1c_nz.f"
## [43] "followup健診受診年月" "weight_nz.diff" "systolicBP_nz.diff"
## [46] "diastolicBP_nz.diff" "ldl_c_nz.diff" "hdl_c_nz.diff"
## [49] "TG_nz.diff" "HbA1c_nz.diff"
is.na(user.data_8.detect) %>% colSums()
## ID gender age
## 0 0 0
## kencom登録年月 資格取得年月 資格喪失年月
## 0 0 0
## レセプトデータ開始年月 レセプトデータ終了年月 baseline健診受診年月
## 0 0 0
## weight_nz.b systolicBP_nz.b diastolicBP_nz.b
## 4 4 4
## ldl_c_nz.b hdl_c_nz.b TG_nz.b
## 24 23 23
## HbA1c_nz.b データ有り月数_total データ有り月数_pre
## 175 0 0
## データ有り月数_post 月ごとの平均データ数 step_mean_pre
## 0 0 0
## step_mean_post step_sd_pre step_sd_post
## 0 0 0
## step_mean_pre.wd step_mean_post.wd step_sd_pre.wd
## 0 0 0
## step_sd_post.wd step_mean_pre.we step_mean_post.we
## 0 0 0
## step_sd_pre.we step_sd_post.we step.diff
## 0 0 0
## step.diff.wd step.diff.we weight_nz.f
## 0 0 2
## systolicBP_nz.f diastolicBP_nz.f ldl_c_nz.f
## 1 1 9
## hdl_c_nz.f TG_nz.f HbA1c_nz.f
## 8 8 104
## followup健診受診年月 weight_nz.diff systolicBP_nz.diff
## 0 6 5
## diastolicBP_nz.diff ldl_c_nz.diff hdl_c_nz.diff
## 5 26 24
## TG_nz.diff HbA1c_nz.diff
## 24 203
sapply(user.data_8.detect, class)
## ID gender age
## "character" "character" "integer"
## kencom登録年月 資格取得年月 資格喪失年月
## "character" "character" "character"
## レセプトデータ開始年月 レセプトデータ終了年月 baseline健診受診年月
## "character" "character" "character"
## weight_nz.b systolicBP_nz.b diastolicBP_nz.b
## "numeric" "numeric" "numeric"
## ldl_c_nz.b hdl_c_nz.b TG_nz.b
## "numeric" "numeric" "numeric"
## HbA1c_nz.b データ有り月数_total データ有り月数_pre
## "numeric" "integer" "integer"
## データ有り月数_post 月ごとの平均データ数 step_mean_pre
## "integer" "numeric" "numeric"
## step_mean_post step_sd_pre step_sd_post
## "numeric" "numeric" "numeric"
## step_mean_pre.wd step_mean_post.wd step_sd_pre.wd
## "numeric" "numeric" "numeric"
## step_sd_post.wd step_mean_pre.we step_mean_post.we
## "numeric" "numeric" "numeric"
## step_sd_pre.we step_sd_post.we step.diff
## "numeric" "numeric" "numeric"
## step.diff.wd step.diff.we weight_nz.f
## "numeric" "numeric" "numeric"
## systolicBP_nz.f diastolicBP_nz.f ldl_c_nz.f
## "numeric" "numeric" "numeric"
## hdl_c_nz.f TG_nz.f HbA1c_nz.f
## "numeric" "numeric" "numeric"
## followup健診受診年月 weight_nz.diff systolicBP_nz.diff
## "character" "numeric" "numeric"
## diastolicBP_nz.diff ldl_c_nz.diff hdl_c_nz.diff
## "numeric" "numeric" "numeric"
## TG_nz.diff HbA1c_nz.diff
## "numeric" "numeric"
#user.data_8.detect$ID<-as.factor(user.data_8.detect$ID) #IDは削除するので不要
user.data_8.detect$gender<-as.factor(user.data_8.detect$gender)
user.data_8.detect$age<-as.numeric(user.data_8.detect$age)
user.data_8.detect$データ有り月数_total<-as.numeric(user.data_8.detect$データ有り月数_total)
user.data_8.detect$データ有り月数_pre<-as.numeric(user.data_8.detect$データ有り月数_pre)
user.data_8.detect$データ有り月数_post<-as.numeric(user.data_8.detect$データ有り月数_post)
user.data_8.detect_missF <-
user.data_8.detect %>%
ungroup() %>% # グルーピングを解除
select(-ID, -kencom登録年月, -baseline健診受診年月, -followup健診受診年月,
-資格取得年月, -資格喪失年月, -レセプトデータ開始年月, -レセプトデータ終了年月)
#IDは削除したくないため、取り出しておく。missForest関数の結果と後で統合するため。
id <- user.data_8.detect$ID
sapply(user.data_8.detect_missF, class)
## gender age weight_nz.b
## "factor" "numeric" "numeric"
## systolicBP_nz.b diastolicBP_nz.b ldl_c_nz.b
## "numeric" "numeric" "numeric"
## hdl_c_nz.b TG_nz.b HbA1c_nz.b
## "numeric" "numeric" "numeric"
## データ有り月数_total データ有り月数_pre データ有り月数_post
## "numeric" "numeric" "numeric"
## 月ごとの平均データ数 step_mean_pre step_mean_post
## "numeric" "numeric" "numeric"
## step_sd_pre step_sd_post step_mean_pre.wd
## "numeric" "numeric" "numeric"
## step_mean_post.wd step_sd_pre.wd step_sd_post.wd
## "numeric" "numeric" "numeric"
## step_mean_pre.we step_mean_post.we step_sd_pre.we
## "numeric" "numeric" "numeric"
## step_sd_post.we step.diff step.diff.wd
## "numeric" "numeric" "numeric"
## step.diff.we weight_nz.f systolicBP_nz.f
## "numeric" "numeric" "numeric"
## diastolicBP_nz.f ldl_c_nz.f hdl_c_nz.f
## "numeric" "numeric" "numeric"
## TG_nz.f HbA1c_nz.f weight_nz.diff
## "numeric" "numeric" "numeric"
## systolicBP_nz.diff diastolicBP_nz.diff ldl_c_nz.diff
## "numeric" "numeric" "numeric"
## hdl_c_nz.diff TG_nz.diff HbA1c_nz.diff
## "numeric" "numeric" "numeric"
imp_missForest <-
missForest(xmis = as.data.frame(user.data_8.detect_missF),
maxiter = 30,
ntree = 20
)
user.data_9_hokan <- imp_missForest$ximp
user.data_9_hokan <- cbind(ID = id, imp_missForest$ximp) #IDを統合。IDは保持できている。
user.data_9_hokan <-
user.data_9_hokan %>%
inner_join(user.data_8.detect %>% select(ID,kencom登録年月, baseline健診受診年月, followup健診受診年月,
資格取得年月, 資格喪失年月, レセプトデータ開始年月, レセプトデータ終了年月), by = "ID")
user.data_9_hokan %>%
write.csv(., "/Users/abekohei/Desktop/HDS演習_DeSC/HDS_DeSC/user.data_9.欠測値補完済.csv", row.names = FALSE)
data3_hokan <- read.csv("/Users/abekohei/Desktop/HDS演習_DeSC/HDS_DeSC/data3_exam.destructed_original_hokan.csv")
data3_hokan %>% head()
## 仮個人id 性別_sfl 年齢_sfl kencom登録年月 step.diff.quintile
## 1 0yuPvFowq6 男性 66 2016-04-01 1
## 2 1OwsKnMNaE 男性 39 2016-04-01 3
## 3 0IliCoWCZV 男性 31 2016-04-01 3
## 4 M0tEnZHNql 女性 51 2016-04-01 2
## 5 PDnTXele4g 男性 26 2016-04-01 5
## 6 AecvtKe8cE 男性 34 2016-05-01 2
## step.diff.median.by.group weight_nz.diff systolicBP_nz.diff
## 1 -961.92398 -4.1390000 1.204490
## 2 61.99986 -3.1120477 5.615931
## 3 61.99986 0.7284926 -2.445457
## 4 -327.63873 4.0614129 24.935567
## 5 1581.09765 -2.6122737 10.506455
## 6 -327.63873 -0.3897352 -4.238583
## diastolicBP_nz.diff ldl_c_nz.diff hdl_c_nz.diff TG_nz.diff HbA1c_nz.diff
## 1 2.6720062 5.3590352 0.4610505 -15.515152 -0.48755945
## 2 -3.6108605 -0.3891425 1.9976718 -2.879915 -0.21745953
## 3 -2.5427406 -22.6019218 4.9481587 -2.196534 -0.06833491
## 4 1.9345428 -15.3831264 -2.2091562 -19.505292 -0.16728154
## 5 8.2586546 -18.5112023 -18.5583183 104.989170 -0.08458658
## 6 -0.4893504 -10.5642779 -11.3340042 11.296307 -0.02670434
## step_mean.diff baseline健診受診年月 followup健診受診年月 weight_nz.b
## 1 -623.47945 2015-04-01 2017-04-01 66.89074
## 2 -12.98046 2015-06-01 2017-04-01 90.51351
## 3 143.71995 2015-05-01 2017-05-01 71.95757
## 4 -301.81007 2015-05-01 2017-05-01 67.16708
## 5 1174.52970 2015-05-01 2017-05-01 70.95344
## 6 -576.22646 2015-06-01 2017-05-01 65.04586
## systolicBP_nz.b diastolicBP_nz.b ldl_c_nz.b hdl_c_nz.b TG_nz.b HbA1c_nz.b
## 1 109.9507 71.62864 108.8772 44.50094 76.35546 5.631184
## 2 119.0751 87.45228 117.3376 45.69706 108.15197 6.440423
## 3 133.8459 78.75107 118.8808 58.35234 67.63990 5.337701
## 4 112.6972 80.13369 124.2131 54.28470 132.65872 5.760370
## 5 101.7182 56.37320 155.5491 63.88320 72.98456 5.292819
## 6 111.6170 68.20776 136.7508 71.41820 78.06746 5.156067
## weight_nz.f systolicBP_nz.f diastolicBP_nz.f ldl_c_nz.f hdl_c_nz.f TG_nz.f
## 1 62.75174 111.1552 74.30065 114.23626 44.96199 60.84030
## 2 87.40146 124.6911 83.84142 116.94841 47.69473 105.27205
## 3 72.68606 131.4004 76.20833 96.27893 63.30050 65.44337
## 4 71.22849 137.6327 82.06824 108.82998 52.07554 113.15343
## 5 68.34116 112.2247 64.63186 137.03787 45.32488 177.97374
## 6 64.65613 107.3784 67.71841 126.18648 60.08419 89.36376
## HbA1c_nz.f
## 1 5.143625
## 2 6.222963
## 3 5.269366
## 4 5.593088
## 5 5.208233
## 6 5.129363
data3_hokan %>% skimr::skim()
| Name | Piped data |
| Number of rows | 5680 |
| Number of columns | 30 |
| _______________________ | |
| Column type frequency: | |
| character | 5 |
| numeric | 25 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| 仮個人id | 0 | 1 | 10 | 10 | 0 | 5680 | 0 |
| 性別_sfl | 0 | 1 | 2 | 2 | 0 | 2 | 0 |
| kencom登録年月 | 0 | 1 | 10 | 10 | 0 | 24 | 0 |
| baseline健診受診年月 | 0 | 1 | 10 | 10 | 0 | 32 | 0 |
| followup健診受診年月 | 0 | 1 | 10 | 10 | 0 | 24 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| 年齢_sfl | 0 | 1 | 44.85 | 11.08 | 18.00 | 37.00 | 46.00 | 53.00 | 73.00 | ▂▅▇▆▁ |
| step.diff.quintile | 0 | 1 | 2.99 | 1.42 | 1.00 | 2.00 | 3.00 | 4.00 | 5.00 | ▇▇▇▇▇ |
| step.diff.median.by.group | 0 | 1 | 167.23 | 853.89 | -961.92 | -327.64 | 62.00 | 510.03 | 1581.10 | ▅▃▇▁▃ |
| weight_nz.diff | 0 | 1 | 0.37 | 4.09 | -31.13 | -2.05 | 0.44 | 2.92 | 19.59 | ▁▁▅▇▁ |
| systolicBP_nz.diff | 0 | 1 | 1.04 | 13.35 | -59.82 | -7.58 | 1.04 | 9.51 | 52.65 | ▁▁▇▃▁ |
| diastolicBP_nz.diff | 0 | 1 | 1.02 | 9.60 | -37.00 | -5.21 | 0.99 | 7.20 | 45.97 | ▁▃▇▂▁ |
| ldl_c_nz.diff | 0 | 1 | 1.47 | 22.48 | -143.84 | -10.55 | 1.73 | 14.31 | 139.08 | ▁▁▇▁▁ |
| hdl_c_nz.diff | 0 | 1 | 1.12 | 9.21 | -43.70 | -4.56 | 0.83 | 6.40 | 52.75 | ▁▂▇▁▁ |
| TG_nz.diff | 0 | 1 | -0.01 | 72.32 | -1279.42 | -18.80 | 0.15 | 19.67 | 1146.49 | ▁▁▇▁▁ |
| HbA1c_nz.diff | 0 | 1 | 0.04 | 0.37 | -4.51 | -0.17 | 0.04 | 0.24 | 5.93 | ▁▁▇▁▁ |
| step_mean.diff | 0 | 1 | 209.38 | 1301.86 | -6205.96 | -442.92 | 58.09 | 660.11 | 12842.29 | ▁▇▁▁▁ |
| weight_nz.b | 0 | 1 | 64.08 | 12.36 | 32.24 | 54.99 | 63.62 | 71.77 | 139.68 | ▃▇▂▁▁ |
| systolicBP_nz.b | 0 | 1 | 117.88 | 15.13 | 75.98 | 107.22 | 117.10 | 127.38 | 209.34 | ▂▇▂▁▁ |
| diastolicBP_nz.b | 0 | 1 | 73.18 | 11.51 | 41.28 | 64.99 | 72.28 | 80.81 | 126.16 | ▂▇▆▁▁ |
| ldl_c_nz.b | 0 | 1 | 120.58 | 30.26 | 28.98 | 99.26 | 118.20 | 139.74 | 276.22 | ▁▇▅▁▁ |
| hdl_c_nz.b | 0 | 1 | 63.26 | 16.04 | 24.59 | 51.51 | 61.31 | 72.91 | 140.46 | ▂▇▃▁▁ |
| TG_nz.b | 0 | 1 | 102.81 | 82.10 | 20.62 | 57.46 | 81.49 | 119.74 | 1450.27 | ▇▁▁▁▁ |
| HbA1c_nz.b | 0 | 1 | 5.42 | 0.47 | 4.09 | 5.15 | 5.36 | 5.61 | 12.17 | ▇▂▁▁▁ |
| weight_nz.f | 0 | 1 | 64.45 | 12.35 | 31.64 | 55.43 | 64.10 | 71.80 | 141.43 | ▂▇▂▁▁ |
| systolicBP_nz.f | 0 | 1 | 118.92 | 15.38 | 77.58 | 108.04 | 118.40 | 128.88 | 204.01 | ▂▇▃▁▁ |
| diastolicBP_nz.f | 0 | 1 | 74.20 | 11.43 | 41.49 | 66.21 | 73.56 | 81.76 | 121.98 | ▁▇▇▂▁ |
| ldl_c_nz.f | 0 | 1 | 122.04 | 30.77 | 28.81 | 100.75 | 119.66 | 141.05 | 279.04 | ▁▇▅▁▁ |
| hdl_c_nz.f | 0 | 1 | 64.38 | 16.54 | 25.77 | 52.36 | 62.18 | 74.19 | 151.11 | ▃▇▃▁▁ |
| TG_nz.f | 0 | 1 | 102.80 | 81.76 | 19.96 | 57.61 | 81.43 | 122.58 | 1544.59 | ▇▁▁▁▁ |
| HbA1c_nz.f | 0 | 1 | 5.46 | 0.50 | 4.05 | 5.18 | 5.40 | 5.65 | 12.56 | ▇▂▁▁▁ |
colnames(data3_hokan)
## [1] "仮個人id" "性別_sfl"
## [3] "年齢_sfl" "kencom登録年月"
## [5] "step.diff.quintile" "step.diff.median.by.group"
## [7] "weight_nz.diff" "systolicBP_nz.diff"
## [9] "diastolicBP_nz.diff" "ldl_c_nz.diff"
## [11] "hdl_c_nz.diff" "TG_nz.diff"
## [13] "HbA1c_nz.diff" "step_mean.diff"
## [15] "baseline健診受診年月" "followup健診受診年月"
## [17] "weight_nz.b" "systolicBP_nz.b"
## [19] "diastolicBP_nz.b" "ldl_c_nz.b"
## [21] "hdl_c_nz.b" "TG_nz.b"
## [23] "HbA1c_nz.b" "weight_nz.f"
## [25] "systolicBP_nz.f" "diastolicBP_nz.f"
## [27] "ldl_c_nz.f" "hdl_c_nz.f"
## [29] "TG_nz.f" "HbA1c_nz.f"
data3_hokan <- data3_hokan %>%
rename(gender = 性別_sfl, age = 年齢_sfl) %>%
mutate(gender = if_else(gender == "男性", "male", "female"))
#data3の確認
ggplot(data3_hokan,mapping = aes(x=step.diff.quintile,fill=gender))+
geom_bar()
hist(data3_hokan$age)
ggplot(data3_hokan, aes(x=age, fill=gender)) +
geom_histogram(binwidth=1, position="identity",alpha=0.5) +
scale_fill_brewer(palette="Set1") +
labs(title="age", x="age", y="count") +
theme_minimal()
hist(data3_hokan$step_mean.diff)
ggplot(data3_hokan,mapping = aes(x=weight_nz.diff,fill=gender))+
geom_histogram(bins = 100)
ggplot(data3_hokan,mapping = aes(x=systolicBP_nz.diff,fill=gender))+
geom_histogram(bins = 100)
ggplot(data3_hokan,mapping = aes(x=diastolicBP_nz.diff,fill=gender))+
geom_histogram(bins = 100)
ggplot(data3_hokan,mapping = aes(x=ldl_c_nz.diff,fill=gender))+
geom_histogram(bins = 100)
ggplot(data3_hokan,mapping = aes(x=hdl_c_nz.diff,fill=gender))+
geom_histogram(bins = 100)
ggplot(data3_hokan,mapping = aes(x=TG_nz.diff,fill=gender))+
geom_histogram(bins = 100)
ggplot(data3_hokan,mapping = aes(x=HbA1c_nz.diff,fill=gender))+
geom_histogram(bins = 100)
ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=weight_nz.diff,color=gender))+
geom_point()+
geom_smooth(method = "lm",se=F)+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=systolicBP_nz.diff,color=gender))+
geom_point()+
geom_smooth(method = "lm",se=F)+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=diastolicBP_nz.diff,color=gender))+
geom_point()+
geom_smooth(method = "lm",se=F)+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=ldl_c_nz.diff,color=gender))+
geom_point()+
geom_smooth(method = "lm",se=F)+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=hdl_c_nz.diff,color=gender))+
geom_point()+
geom_smooth(method = "lm",se=F)+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=TG_nz.diff,color=gender))+
geom_point()+
geom_smooth(method = "lm",se=F)+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=HbA1c_nz.diff,color=gender))+
geom_point()+
geom_smooth(method = "lm",se=F)+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
data3_hokan_longer<- data3_hokan %>%
pivot_longer(
cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
names_to = "measure_type",
values_to = "value"
)
ggplot(data3_hokan_longer, aes(x = step_mean.diff, y = value, color = gender)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~measure_type, scales = "free") +
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
data3_hokan %>% group_by(gender,"HbA1c_nz.diff") %>%
summarise(mean_Hb=mean(HbA1c_nz.diff)) %>%
ggplot(mapping = aes(x=gender,y=mean_Hb,fill=gender))+
geom_bar(stat="identity",position="dodge")+
xlab("Gender") +ylab("HbA1c_diff")+
theme_bw()
## `summarise()` has grouped output by 'gender'. You can override using the
## `.groups` argument.
t.test(HbA1c_nz.diff~gender,data=data3_hokan)
##
## Welch Two Sample t-test
##
## data: HbA1c_nz.diff by gender
## t = -0.50046, df = 5205.3, p-value = 0.6168
## alternative hypothesis: true difference in means between group female and group male is not equal to 0
## 95 percent confidence interval:
## -0.02415687 0.01433144
## sample estimates:
## mean in group female mean in group male
## 0.03511848 0.04003119
#ヒストグラム作成
#step.diff.quintileごとの平均年齢
data3_hokan %>%
group_by(step.diff.quintile) %>%
summarise(mean_age=mean(age,na.rm=T)) %>%
ggplot(mapping = aes(x=step.diff.quintile,y=mean_age))+
geom_bar(stat="identity")+
xlab("Step Difference Quintile") +
ylab("Mean Age")+
theme_bw()
#step.diff.quintileごとにおけるベースライン時と健診時の体重差
#step.diff.quintileごとにおけるベースライン時と健診時の体重差
data3_hokan %>%
group_by(step.diff.quintile) %>%
summarise(mean_weight=mean(weight_nz.diff,na.rm=T),
sd_weight=sd(weight_nz.diff,na.rm=T),
se_weight=sd_weight / sqrt(n())) %>%
ggplot(mapping = aes(x=step.diff.quintile,y=mean_weight))+
geom_errorbar(aes(ymin=mean_weight-se_weight,ymax=mean_weight+se_weight),width=0.3)+
geom_bar(stat="identity")+
xlab("Step Difference Quintile") +
ylab("Changing Weight(Mean Weight)")+
theme_bw()
data3_hokan %>%
group_by(step.diff.quintile, gender) %>%
summarise(mean_weight=mean(weight_nz.diff, na.rm=TRUE),
sd_weight=sd(weight_nz.diff, na.rm=TRUE),
se_weight=sd_weight / sqrt(n())) %>%
ggplot(mapping = aes(x=step.diff.quintile, y=mean_weight, fill=gender)) +
geom_errorbar(aes(ymin=mean_weight-se_weight, ymax=mean_weight+se_weight), width=0.3,
position=position_dodge(width=0.5)) +
geom_bar(stat="identity", position=position_dodge(width=0.5)) +
xlab("Step Difference Quintile") +
ylab("Changing Weight (Mean Weight)") +
theme_bw() +
scale_fill_manual(values = c("male" = "blue", "female" = "pink")) # ここで性別ごとの色を指定
## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.
data3_hokan %>%
pivot_longer(
cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
names_to = "measure_type",
values_to = "diff_value"
) %>%
group_by(step.diff.quintile, measure_type) %>%
summarise(mean=mean(diff_value,na.rm=T),
sd=sd(diff_value,na.rm=T),
se=sd / sqrt(n())) %>%
ggplot(mapping = aes(x = step.diff.quintile, y =mean)) +
geom_bar(stat = "identity",position = "dodge") +
geom_errorbar(aes(ymin=mean-se,ymax=mean+se),width=0.2)+
facet_wrap(~measure_type,scales = "free")+
theme_classic()
## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.
健診検査値変化量 = Β x step.diff.median.by.group+切片(intercept)
data3_hokan %>% head()
## 仮個人id gender age kencom登録年月 step.diff.quintile
## 1 0yuPvFowq6 male 66 2016-04-01 1
## 2 1OwsKnMNaE male 39 2016-04-01 3
## 3 0IliCoWCZV male 31 2016-04-01 3
## 4 M0tEnZHNql female 51 2016-04-01 2
## 5 PDnTXele4g male 26 2016-04-01 5
## 6 AecvtKe8cE male 34 2016-05-01 2
## step.diff.median.by.group weight_nz.diff systolicBP_nz.diff
## 1 -961.92398 -4.1390000 1.204490
## 2 61.99986 -3.1120477 5.615931
## 3 61.99986 0.7284926 -2.445457
## 4 -327.63873 4.0614129 24.935567
## 5 1581.09765 -2.6122737 10.506455
## 6 -327.63873 -0.3897352 -4.238583
## diastolicBP_nz.diff ldl_c_nz.diff hdl_c_nz.diff TG_nz.diff HbA1c_nz.diff
## 1 2.6720062 5.3590352 0.4610505 -15.515152 -0.48755945
## 2 -3.6108605 -0.3891425 1.9976718 -2.879915 -0.21745953
## 3 -2.5427406 -22.6019218 4.9481587 -2.196534 -0.06833491
## 4 1.9345428 -15.3831264 -2.2091562 -19.505292 -0.16728154
## 5 8.2586546 -18.5112023 -18.5583183 104.989170 -0.08458658
## 6 -0.4893504 -10.5642779 -11.3340042 11.296307 -0.02670434
## step_mean.diff baseline健診受診年月 followup健診受診年月 weight_nz.b
## 1 -623.47945 2015-04-01 2017-04-01 66.89074
## 2 -12.98046 2015-06-01 2017-04-01 90.51351
## 3 143.71995 2015-05-01 2017-05-01 71.95757
## 4 -301.81007 2015-05-01 2017-05-01 67.16708
## 5 1174.52970 2015-05-01 2017-05-01 70.95344
## 6 -576.22646 2015-06-01 2017-05-01 65.04586
## systolicBP_nz.b diastolicBP_nz.b ldl_c_nz.b hdl_c_nz.b TG_nz.b HbA1c_nz.b
## 1 109.9507 71.62864 108.8772 44.50094 76.35546 5.631184
## 2 119.0751 87.45228 117.3376 45.69706 108.15197 6.440423
## 3 133.8459 78.75107 118.8808 58.35234 67.63990 5.337701
## 4 112.6972 80.13369 124.2131 54.28470 132.65872 5.760370
## 5 101.7182 56.37320 155.5491 63.88320 72.98456 5.292819
## 6 111.6170 68.20776 136.7508 71.41820 78.06746 5.156067
## weight_nz.f systolicBP_nz.f diastolicBP_nz.f ldl_c_nz.f hdl_c_nz.f TG_nz.f
## 1 62.75174 111.1552 74.30065 114.23626 44.96199 60.84030
## 2 87.40146 124.6911 83.84142 116.94841 47.69473 105.27205
## 3 72.68606 131.4004 76.20833 96.27893 63.30050 65.44337
## 4 71.22849 137.6327 82.06824 108.82998 52.07554 113.15343
## 5 68.34116 112.2247 64.63186 137.03787 45.32488 177.97374
## 6 64.65613 107.3784 67.71841 126.18648 60.08419 89.36376
## HbA1c_nz.f
## 1 5.143625
## 2 6.222963
## 3 5.269366
## 4 5.593088
## 5 5.208233
## 6 5.129363
unique(data3_hokan$step.diff.median.by.group)
## [1] -961.92398 61.99986 -327.63873 1581.09765 510.03305
n_distinct(data3_hokan$step.diff.median.by.group)
## [1] 5
summary(data3_hokan$step.diff.median.by.group)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -961.9 -327.6 62.0 167.2 510.0 1581.1
str(data3_hokan$step.diff.median.by.group)
## num [1:5680] -962 62 62 -328 1581 ...
data4_hokan<-data3_hokan %>%
pivot_longer(
cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
names_to = "measure_type",
values_to = "diff_value"
) %>%
group_by(step.diff.median.by.group, measure_type) %>%
summarise(mean=mean(diff_value,na.rm=T),
sd=sd(diff_value,na.rm=T),
se=sd / sqrt(n()))
## `summarise()` has grouped output by 'step.diff.median.by.group'. You can
## override using the `.groups` argument.
data4_hokan %>% head()
## # A tibble: 6 × 5
## # Groups: step.diff.median.by.group [1]
## step.diff.median.by.group measure_type mean sd se
## <dbl> <chr> <dbl> <dbl> <dbl>
## 1 -962. HbA1c_nz.diff 0.0492 0.417 0.0123
## 2 -962. TG_nz.diff 3.39 82.1 2.42
## 3 -962. diastolicBP_nz.diff 1.35 9.08 0.268
## 4 -962. hdl_c_nz.diff 0.156 9.32 0.275
## 5 -962. ldl_c_nz.diff 3.03 23.4 0.688
## 6 -962. systolicBP_nz.diff 1.69 12.7 0.375
#dBP
DBP_hokan<-
data4_hokan %>%
filter(measure_type=="diastolicBP_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="dBP_diff",.before=Estimate)
#sBP
SBP_hokan<-
data4_hokan %>%
filter(measure_type=="systolicBP_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="sBP_diff",.before=Estimate)
#weight
weight_hokan<-
data4_hokan %>%
filter(measure_type=="weight_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="weight_diff",.before=Estimate)
#HbA1c
HbA1c_hokan<-
data4_hokan %>%
filter(measure_type=="HbA1c_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="HbA1c_diff",.before=Estimate)
#TG
TG_hokan<-
data4_hokan %>%
filter(measure_type=="TG_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="TG_diff",.before=Estimate)
#HDL
HDL_hokan<-
data4_hokan %>%
filter(measure_type=="hdl_c_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="HDL_diff",.before=Estimate)
#LDL
LDL_hokan<-
data4_hokan %>%
filter(measure_type=="ldl_c_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="LDL_diff",.before=Estimate)
data5_hokan<-bind_rows(DBP_hokan,SBP_hokan,weight_hokan,HbA1c_hokan,TG_hokan,HDL_hokan,LDL_hokan)
data5_hokan %>%
mutate("p<0.05"=if_else(p_value<0.05,"*",""))
## term Estimate p_value p<0.05
## step.diff.median.by.group...1 dBP_diff -2.851582e-04 0.065543559
## step.diff.median.by.group...2 sBP_diff -4.436297e-04 0.007693330 *
## step.diff.median.by.group...3 weight_diff -3.547857e-04 0.003060491 *
## step.diff.median.by.group...4 HbA1c_diff -1.516801e-05 0.023672830 *
## step.diff.median.by.group...5 TG_diff -2.229034e-03 0.319833692
## step.diff.median.by.group...6 HDL_diff 8.257078e-04 0.007198414 *
## step.diff.median.by.group...7 LDL_diff -6.019312e-04 0.249781215