This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
pacman::p_load(tidyverse,readxl,ggplot2,dplyr,tableone,ggpubr,ggrepel,DBI,skimr,dbplyr,pryr,lubridate, survival, survminer,car,svglite,MatchIt, cobalt)
#データ読み込み
data1<-read.csv("/Users/abekohei/Downloads/2_1日目講義前配布用/script_for_provision/processed_data/data1_step.destructed.csv")
data2<-read.csv("/Users/abekohei/Downloads/2_1日目講義前配布用/script_for_provision/processed_data/data2_monthly_step.destructed.csv")
data3<-read.csv("/Users/abekohei/Downloads/2_1日目講義前配布用/script_for_provision/processed_data/data3_exam.destructed.csv")
data1 %>% skimr::skim()
| Name | Piped data |
| Number of rows | 12602 |
| Number of columns | 19 |
| _______________________ | |
| Column type frequency: | |
| character | 4 |
| numeric | 15 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| 仮個人id | 0 | 1 | 10 | 10 | 0 | 12602 | 0 |
| 性別_sfl | 0 | 1 | 2 | 2 | 0 | 2 | 0 |
| kencom登録年月 | 0 | 1 | 10 | 10 | 0 | 27 | 0 |
| baseline健診受診年月 | 0 | 1 | 10 | 10 | 0 | 38 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| 年齢_sfl | 0 | 1.00 | 44.72 | 11.04 | 18.00 | 37.00 | 46.00 | 53.00 | 73.00 | ▂▅▇▆▁ |
| step_mean.diff | 0 | 1.00 | 509.85 | 1594.22 | -6205.96 | -307.56 | 200.83 | 979.56 | 24369.72 | ▅▇▁▁▁ |
| step_mean.b | 0 | 1.00 | 5641.75 | 2685.98 | 25.30 | 3386.36 | 5544.94 | 7509.58 | 20954.08 | ▆▇▂▁▁ |
| step_mean.f | 0 | 1.00 | 6151.60 | 2723.39 | 12.77 | 4085.90 | 6017.96 | 7872.06 | 30572.20 | ▇▇▁▁▁ |
| step_sd.b | 0 | 1.00 | 3089.95 | 1172.33 | 46.16 | 2245.54 | 2999.51 | 3770.87 | 7677.58 | ▂▇▆▂▁ |
| step_sd.f | 0 | 1.00 | 3420.28 | 1412.43 | 47.54 | 2547.88 | 3219.49 | 4026.36 | 36661.56 | ▇▁▁▁▁ |
| step_mean.diff.wd | 0 | 1.00 | 533.68 | 1684.35 | -7282.11 | -305.09 | 220.83 | 1038.35 | 23533.30 | ▁▇▁▁▁ |
| step_mean.diff.we | 0 | 1.00 | 462.63 | 1748.29 | -9706.07 | -439.25 | 198.55 | 1016.13 | 26459.46 | ▁▇▁▁▁ |
| weight_nz.b | 4 | 1.00 | 62.87 | 12.65 | 31.87 | 53.18 | 62.05 | 70.81 | 139.68 | ▃▇▂▁▁ |
| systolicBP_nz.b | 5 | 1.00 | 118.19 | 15.92 | 75.16 | 106.95 | 117.17 | 127.86 | 209.34 | ▂▇▃▁▁ |
| diastolicBP_nz.b | 5 | 1.00 | 73.24 | 11.82 | 38.80 | 64.90 | 72.40 | 80.78 | 128.57 | ▁▇▆▁▁ |
| ldl_c_nz.b | 35 | 1.00 | 121.50 | 31.45 | 28.53 | 99.48 | 119.03 | 141.13 | 283.21 | ▁▇▅▁▁ |
| hdl_c_nz.b | 33 | 1.00 | 64.82 | 16.41 | 24.59 | 52.76 | 63.10 | 74.91 | 152.03 | ▂▇▃▁▁ |
| TG_nz.b | 33 | 1.00 | 100.59 | 78.44 | 20.16 | 56.83 | 80.02 | 118.76 | 1588.09 | ▇▁▁▁▁ |
| HbA1c_nz.b | 330 | 0.97 | 5.44 | 0.54 | 4.02 | 5.15 | 5.37 | 5.62 | 13.12 | ▇▁▁▁▁ |
# 平均と標準偏差を計算
mean_data <- mean(data1$step_mean.diff)
sd_data <- sd(data1$step_mean.diff)
se_data <- sd_data / sqrt(length(data1$step_mean.diff))
mean_data
## [1] 509.8529
sd_data
## [1] 1594.224
se_data
## [1] 14.20135
#年齢
ggplot(data1,mapping = aes(x=年齢_sfl))+
geom_histogram(bins = 50)+
theme_bw()
#体重
ggplot(data1,mapping = aes(x=weight_nz.b))+
geom_histogram(bins = 100)+
theme_bw()
## Warning: Removed 4 rows containing non-finite values (`stat_bin()`).
#歩数変化量
ggplot(data1,mapping = aes(x=step_mean.diff))+
geom_histogram(bins = 100)+
#xlim(-5000,5000)+
theme_bw()
# Kolmogorov Smirnovテストの実行
#ks.test(data1, "pnorm", mean = mean_data, sd = sd_data)
正規分布ではないのでウィルコクソン符号順位検定の実行
#wilcox.test(data1$step_mean.diff, mu = 0, paired = FALSE)
data1$step_mean.diff %>% t.test()
##
## One Sample t-test
##
## data: .
## t = 35.902, df = 12601, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 482.0161 537.6897
## sample estimates:
## mean of x
## 509.8529
data2 %>% skimr::skim()
| Name | Piped data |
| Number of rows | 302448 |
| Number of columns | 10 |
| _______________________ | |
| Column type frequency: | |
| character | 4 |
| numeric | 6 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| 仮個人id | 0 | 1 | 10 | 10 | 0 | 12602 | 0 |
| 性別_sfl | 0 | 1 | 2 | 2 | 0 | 2 | 0 |
| kencom登録年月 | 0 | 1 | 10 | 10 | 0 | 27 | 0 |
| 年月 | 0 | 1 | 10 | 10 | 0 | 50 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| 年齢_sfl | 0 | 1 | 44.72 | 11.04 | 18 | 37.00 | 46.00 | 53.00 | 73.00 | ▂▅▇▆▁ |
| 登録からの月数 | 0 | 1 | -0.50 | 6.92 | -12 | -6.25 | -0.50 | 5.25 | 11.00 | ▇▇▆▇▇ |
| step_mean | 0 | 1 | 5893.71 | 2976.58 | 2 | 3573.42 | 5695.32 | 7752.38 | 84714.43 | ▇▁▁▁▁ |
| step_sd | 0 | 1 | 2955.37 | 1477.54 | 0 | 1978.24 | 2771.46 | 3671.65 | 47763.00 | ▇▁▁▁▁ |
| step_mean.wd | 51 | 1 | 6210.87 | 3225.61 | 2 | 3631.09 | 6026.95 | 8276.27 | 84697.91 | ▇▁▁▁▁ |
| step_mean.we | 275 | 1 | 5085.80 | 3278.90 | 2 | 2735.50 | 4435.88 | 6703.11 | 93572.88 | ▇▁▁▁▁ |
#ヒストグラム作成
ggplot(data2,mapping = aes(x=step_mean))+
geom_histogram(bins = 100)+
theme_bw()
## 登録月と歩数の関係
se_data2 <- sd(data2$step_mean) / sqrt(length(data2$step_mean))
data2 %>% group_by(登録からの月数) %>%
summarise(mean_step=mean(step_mean),
sd_step=sd(step_mean),
se_step=sd_step / sqrt(n())) %>%
ggplot(mapping = aes(x=登録からの月数,y=mean_step))+
geom_line()+
geom_point()+
geom_errorbar(aes(ymin=mean_step-se_step,ymax=mean_step+se_step),width=0.1)+
geom_vline(xintercept = 0,linetype="dotted")+
xlab("registration_month")+ylab("mean_step")+
theme_bw()
#週末と平日における歩数の違い(wd=平日、we=週末)
#平日
data2 %>% filter(!is.na(step_mean.wd)) %>%
group_by(登録からの月数) %>%
summarise(mean_step=mean(step_mean.wd),
sd_step=sd(step_mean.wd),
se_step=sd_step / sqrt(n())) %>%
ggplot(mapping = aes(x=登録からの月数,y=mean_step))+
geom_line()+
geom_point()+
geom_errorbar(aes(ymin=mean_step-se_step,ymax=mean_step+se_step),width=0.1)+
geom_vline(xintercept = 0,linetype="dotted")+
xlab("registration_month")+ylab("mean_step")+
theme_bw()
#週末
data2 %>% filter(!is.na(step_mean.we)) %>%
group_by(登録からの月数) %>%
summarise(mean_step=mean(step_mean.we),
sd_step=sd(step_mean.we),
se_step=sd_step / sqrt(n())) %>%
ggplot(mapping = aes(x=登録からの月数,y=mean_step))+
geom_line()+
geom_point()+
geom_errorbar(aes(ymin=mean_step-se_step,ymax=mean_step+se_step),width=0.1)+
geom_vline(xintercept = 0,linetype="dotted")+
xlab("registration_month")+ylab("mean_step")+
theme_bw()
#平日と週末の歩数について
data2_we <- data2 %>%
pivot_longer(
cols = c(step_mean.wd, step_mean.we),
names_to = "day_type",
values_to = "mean_step"
) %>%
mutate(day_type = if_else(day_type == "step_mean.we", "Weekend", "Weekday")) %>%
distinct(仮個人id,登録からの月数,day_type,.keep_all = T) %>%
group_by(登録からの月数, day_type) %>%
summarise(
mean_step1 = mean(mean_step, na.rm = TRUE),
sd_step = sd(mean_step, na.rm = TRUE),
se_step = sd_step / sqrt(n()))
## `summarise()` has grouped output by '登録からの月数'. You can override using
## the `.groups` argument.
data2_we
## # A tibble: 48 × 5
## # Groups: 登録からの月数 [24]
## 登録からの月数 day_type mean_step1 sd_step se_step
## <int> <chr> <dbl> <dbl> <dbl>
## 1 -12 Weekday 5992. 3191. 28.4
## 2 -12 Weekend 4953. 3090. 27.5
## 3 -11 Weekday 5930. 3065. 27.3
## 4 -11 Weekend 4848. 3062. 27.3
## 5 -10 Weekday 5938. 3106. 27.7
## 6 -10 Weekend 4822. 3013. 26.8
## 7 -9 Weekday 5925. 3118. 27.8
## 8 -9 Weekend 4823. 3010. 26.8
## 9 -8 Weekday 5901. 3102. 27.6
## 10 -8 Weekend 4823. 3081. 27.4
## # ℹ 38 more rows
ggplot(data2_we, aes(x = 登録からの月数, y = mean_step1, color = day_type)) +
geom_line() +
geom_point() +
geom_errorbar(aes(ymin = mean_step1 - se_step, ymax = mean_step1 + se_step)) +
geom_vline(xintercept = 0, linetype = "dotted") +
xlab("registration_month") + ylab("mean_step") +
theme_bw() +
scale_color_manual(values = c("Weekend" = "blue", "Weekday" = "red"))
data3 %>% head()
## 仮個人id 性別_sfl 年齢_sfl kencom登録年月 step.diff.quintile
## 1 0yuPvFowq6 男性 66 2016-04-01 1
## 2 1OwsKnMNaE 男性 39 2016-04-01 3
## 3 0IliCoWCZV 男性 31 2016-04-01 3
## 4 M0tEnZHNql 女性 51 2016-04-01 2
## 5 PDnTXele4g 男性 26 2016-04-01 5
## 6 AecvtKe8cE 男性 34 2016-05-01 2
## step.diff.median.by.group weight_nz.diff systolicBP_nz.diff
## 1 -963.51562 -4.1390000 1.204490
## 2 62.69344 -3.1120477 5.615931
## 3 62.69344 0.7284926 -2.445457
## 4 -327.36152 4.0614129 24.935567
## 5 1596.78921 -2.6122737 10.506455
## 6 -327.36152 -0.3897352 -4.238583
## diastolicBP_nz.diff ldl_c_nz.diff hdl_c_nz.diff TG_nz.diff HbA1c_nz.diff
## 1 2.6720062 5.3590352 0.4610505 -15.515152 -0.48755945
## 2 -3.6108605 -0.3891425 1.9976718 -2.879915 -0.21745953
## 3 -2.5427406 -22.6019218 4.9481587 -2.196534 -0.06833491
## 4 1.9345428 -15.3831264 -2.2091562 -19.505292 -0.16728154
## 5 8.2586546 -18.5112023 -18.5583183 104.989170 -0.08458658
## 6 -0.4893504 -10.5642779 -11.3340042 11.296307 -0.02670434
## step_mean.diff baseline健診受診年月 followup健診受診年月 weight_nz.b
## 1 -623.47945 2015-04-01 2017-04-01 66.89074
## 2 -12.98046 2015-06-01 2017-04-01 90.51351
## 3 143.71995 2015-05-01 2017-05-01 71.95757
## 4 -301.81007 2015-05-01 2017-05-01 67.16708
## 5 1174.52970 2015-05-01 2017-05-01 70.95344
## 6 -576.22646 2015-06-01 2017-05-01 65.04586
## systolicBP_nz.b diastolicBP_nz.b ldl_c_nz.b hdl_c_nz.b TG_nz.b HbA1c_nz.b
## 1 109.9507 71.62864 108.8772 44.50094 76.35546 5.631184
## 2 119.0751 87.45228 117.3376 45.69706 108.15197 6.440423
## 3 133.8459 78.75107 118.8808 58.35234 67.63990 5.337701
## 4 112.6972 80.13369 124.2131 54.28470 132.65872 5.760370
## 5 101.7182 56.37320 155.5491 63.88320 72.98456 5.292819
## 6 111.6170 68.20776 136.7508 71.41820 78.06746 5.156067
## weight_nz.f systolicBP_nz.f diastolicBP_nz.f ldl_c_nz.f hdl_c_nz.f TG_nz.f
## 1 62.75174 111.1552 74.30065 114.23626 44.96199 60.84030
## 2 87.40146 124.6911 83.84142 116.94841 47.69473 105.27205
## 3 72.68606 131.4004 76.20833 96.27893 63.30050 65.44337
## 4 71.22849 137.6327 82.06824 108.82998 52.07554 113.15343
## 5 68.34116 112.2247 64.63186 137.03787 45.32488 177.97374
## 6 64.65613 107.3784 67.71841 126.18648 60.08419 89.36376
## HbA1c_nz.f
## 1 5.143625
## 2 6.222963
## 3 5.269366
## 4 5.593088
## 5 5.208233
## 6 5.129363
data3 <- data3 %>%
rename(gender = 性別_sfl, age = 年齢_sfl) %>%
mutate(gender = if_else(gender == "男性", "male", "female"))
data3 %>% skimr::skim()
| Name | Piped data |
| Number of rows | 5473 |
| Number of columns | 30 |
| _______________________ | |
| Column type frequency: | |
| character | 5 |
| numeric | 25 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| 仮個人id | 0 | 1 | 10 | 10 | 0 | 5473 | 0 |
| gender | 0 | 1 | 4 | 6 | 0 | 2 | 0 |
| kencom登録年月 | 0 | 1 | 10 | 10 | 0 | 24 | 0 |
| baseline健診受診年月 | 0 | 1 | 10 | 10 | 0 | 32 | 0 |
| followup健診受診年月 | 0 | 1 | 10 | 10 | 0 | 24 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| age | 0 | 1 | 44.86 | 11.10 | 18.00 | 37.00 | 46.00 | 53.00 | 73.00 | ▂▅▇▆▁ |
| step.diff.quintile | 0 | 1 | 3.00 | 1.41 | 1.00 | 2.00 | 3.00 | 4.00 | 5.00 | ▇▇▇▇▇ |
| step.diff.median.by.group | 0 | 1 | 177.03 | 860.34 | -963.52 | -327.36 | 62.69 | 516.39 | 1596.79 | ▅▃▇▁▅ |
| weight_nz.diff | 0 | 1 | 0.36 | 4.11 | -31.13 | -2.06 | 0.44 | 2.92 | 19.59 | ▁▁▅▇▁ |
| systolicBP_nz.diff | 0 | 1 | 1.09 | 13.39 | -59.82 | -7.55 | 1.11 | 9.60 | 52.65 | ▁▂▇▃▁ |
| diastolicBP_nz.diff | 0 | 1 | 1.01 | 9.59 | -37.00 | -5.21 | 0.96 | 7.19 | 45.97 | ▁▃▇▂▁ |
| ldl_c_nz.diff | 0 | 1 | 1.44 | 22.55 | -143.84 | -10.60 | 1.72 | 14.37 | 108.90 | ▁▁▇▅▁ |
| hdl_c_nz.diff | 0 | 1 | 1.13 | 9.27 | -43.70 | -4.57 | 0.84 | 6.42 | 52.75 | ▁▂▇▁▁ |
| TG_nz.diff | 0 | 1 | -0.25 | 73.23 | -1279.42 | -19.13 | 0.07 | 19.70 | 1146.49 | ▁▁▇▁▁ |
| HbA1c_nz.diff | 0 | 1 | 0.04 | 0.37 | -4.51 | -0.18 | 0.04 | 0.25 | 5.93 | ▁▁▇▁▁ |
| step_mean.diff | 0 | 1 | 220.59 | 1308.52 | -6205.96 | -437.21 | 62.69 | 668.41 | 12842.29 | ▁▇▁▁▁ |
| weight_nz.b | 0 | 1 | 64.09 | 12.38 | 32.24 | 54.99 | 63.60 | 71.81 | 139.68 | ▃▇▂▁▁ |
| systolicBP_nz.b | 0 | 1 | 117.95 | 15.14 | 75.98 | 107.29 | 117.14 | 127.39 | 209.34 | ▂▇▂▁▁ |
| diastolicBP_nz.b | 0 | 1 | 73.31 | 11.45 | 41.28 | 65.08 | 72.42 | 80.89 | 126.16 | ▁▇▆▁▁ |
| ldl_c_nz.b | 0 | 1 | 120.87 | 30.34 | 28.98 | 99.45 | 118.56 | 140.01 | 276.22 | ▁▇▅▁▁ |
| hdl_c_nz.b | 0 | 1 | 63.33 | 16.13 | 24.59 | 51.51 | 61.31 | 73.07 | 140.46 | ▂▇▃▁▁ |
| TG_nz.b | 0 | 1 | 103.59 | 82.98 | 20.62 | 57.83 | 82.07 | 120.17 | 1450.27 | ▇▁▁▁▁ |
| HbA1c_nz.b | 0 | 1 | 5.43 | 0.47 | 4.09 | 5.15 | 5.37 | 5.62 | 12.17 | ▇▂▁▁▁ |
| weight_nz.f | 0 | 1 | 64.45 | 12.36 | 31.64 | 55.44 | 64.08 | 71.84 | 141.43 | ▂▇▂▁▁ |
| systolicBP_nz.f | 0 | 1 | 119.05 | 15.35 | 77.58 | 108.21 | 118.54 | 129.03 | 204.01 | ▂▇▃▁▁ |
| diastolicBP_nz.f | 0 | 1 | 74.32 | 11.39 | 41.49 | 66.35 | 73.70 | 81.85 | 119.35 | ▁▇▇▂▁ |
| ldl_c_nz.f | 0 | 1 | 122.32 | 30.75 | 28.81 | 101.02 | 119.97 | 141.47 | 279.04 | ▁▇▅▁▁ |
| hdl_c_nz.f | 0 | 1 | 64.46 | 16.63 | 25.77 | 52.39 | 62.23 | 74.44 | 151.11 | ▃▇▃▁▁ |
| TG_nz.f | 0 | 1 | 103.34 | 82.45 | 19.96 | 57.97 | 81.88 | 123.17 | 1544.59 | ▇▁▁▁▁ |
| HbA1c_nz.f | 0 | 1 | 5.46 | 0.49 | 4.05 | 5.18 | 5.41 | 5.65 | 12.56 | ▇▂▁▁▁ |
colnames(data3)
## [1] "仮個人id" "gender"
## [3] "age" "kencom登録年月"
## [5] "step.diff.quintile" "step.diff.median.by.group"
## [7] "weight_nz.diff" "systolicBP_nz.diff"
## [9] "diastolicBP_nz.diff" "ldl_c_nz.diff"
## [11] "hdl_c_nz.diff" "TG_nz.diff"
## [13] "HbA1c_nz.diff" "step_mean.diff"
## [15] "baseline健診受診年月" "followup健診受診年月"
## [17] "weight_nz.b" "systolicBP_nz.b"
## [19] "diastolicBP_nz.b" "ldl_c_nz.b"
## [21] "hdl_c_nz.b" "TG_nz.b"
## [23] "HbA1c_nz.b" "weight_nz.f"
## [25] "systolicBP_nz.f" "diastolicBP_nz.f"
## [27] "ldl_c_nz.f" "hdl_c_nz.f"
## [29] "TG_nz.f" "HbA1c_nz.f"
#data3の確認
ggplot(data3,mapping = aes(x=step.diff.quintile,fill=gender))+
geom_bar()
hist(data3$age)
ggplot(data3, aes(x=age, fill=gender)) +
geom_histogram(binwidth=1, position="identity",alpha=0.5) +
scale_fill_brewer(palette="Set1") +
labs(title="age", x="age", y="count") +
theme_minimal()
hist(data3$step_mean.diff)
ggplot(data3,mapping = aes(x=weight_nz.diff,fill=gender))+
geom_histogram(bins = 100)
ggplot(data3,mapping = aes(x=systolicBP_nz.diff,fill=gender))+
geom_histogram(bins = 100)
ggplot(data3,mapping = aes(x=diastolicBP_nz.diff,fill=gender))+
geom_histogram(bins = 100)
ggplot(data3,mapping = aes(x=ldl_c_nz.diff,fill=gender))+
geom_histogram(bins = 100)
ggplot(data3,mapping = aes(x=hdl_c_nz.diff,fill=gender))+
geom_histogram(bins = 100)
ggplot(data3,mapping = aes(x=TG_nz.diff,fill=gender))+
geom_histogram(bins = 100)
ggplot(data3,mapping = aes(x=HbA1c_nz.diff,fill=gender))+
geom_histogram(bins = 100)
step_mean.diff=(step_mean.f - step_mean.b) HbA1c_nz.diff=(HbA1c_nz.f - HbA1c_nz.b)
ggplot(data3,mapping = aes(x=step_mean.diff,y=weight_nz.diff,color=gender))+
geom_point()+
geom_smooth(method = "lm",se=F)+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data3,mapping = aes(x=step_mean.diff,y=systolicBP_nz.diff,color=gender))+
geom_point()+
geom_smooth(method = "lm",se=F)+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data3,mapping = aes(x=step_mean.diff,y=diastolicBP_nz.diff,color=gender))+
geom_point()+
geom_smooth(method = "lm",se=F)+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data3,mapping = aes(x=step_mean.diff,y=ldl_c_nz.diff,color=gender))+
geom_point()+
geom_smooth(method = "lm",se=F)+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data3,mapping = aes(x=step_mean.diff,y=hdl_c_nz.diff,color=gender))+
geom_point()+
geom_smooth(method = "lm",se=F)+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data3,mapping = aes(x=step_mean.diff,y=TG_nz.diff,color=gender))+
geom_point()+
geom_smooth(method = "lm",se=F)+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data3,mapping = aes(x=step_mean.diff,y=HbA1c_nz.diff,color=gender))+
geom_point()+
geom_smooth(method = "lm",se=F)+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
data3_longer<- data3 %>%
pivot_longer(
cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
names_to = "measure_type",
values_to = "value"
)
ggplot(data3_longer, aes(x = step_mean.diff, y = value, color = gender)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~measure_type, scales = "free") +
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
data3 %>% group_by(gender,"HbA1c_nz.diff") %>%
summarise(mean_Hb=mean(HbA1c_nz.diff)) %>%
ggplot(mapping = aes(x=gender,y=mean_Hb,fill=gender))+
geom_bar(stat="identity",position="dodge")+
xlab("Gender") +ylab("HbA1c_diff")+
theme_bw()
## `summarise()` has grouped output by 'gender'. You can override using the
## `.groups` argument.
t.test(HbA1c_nz.diff~gender,data=data3)
##
## Welch Two Sample t-test
##
## data: HbA1c_nz.diff by gender
## t = -0.55778, df = 5003.3, p-value = 0.577
## alternative hypothesis: true difference in means between group female and group male is not equal to 0
## 95 percent confidence interval:
## -0.02555737 0.01423556
## sample estimates:
## mean in group female mean in group male
## 0.03397754 0.03963844
#ヒストグラム作成
#step.diff.quintileごとの平均年齢
data3 %>%
group_by(step.diff.quintile) %>%
summarise(mean_age=mean(age,na.rm=T)) %>%
ggplot(mapping = aes(x=step.diff.quintile,y=mean_age))+
geom_bar(stat="identity")+
xlab("Step Difference Quintile") +
ylab("Mean Age")+
theme_bw()
#step.diff.quintileごとにおけるベースライン時と健診時の体重差
#step.diff.quintileごとにおけるベースライン時と健診時の体重差
data3 %>%
group_by(step.diff.quintile) %>%
summarise(mean_weight=mean(weight_nz.diff,na.rm=T),
sd_weight=sd(weight_nz.diff,na.rm=T),
se_weight=sd_weight / sqrt(n())) %>%
ggplot(mapping = aes(x=step.diff.quintile,y=mean_weight))+
geom_errorbar(aes(ymin=mean_weight-se_weight,ymax=mean_weight+se_weight),width=0.3)+
geom_bar(stat="identity")+
xlab("Step Difference Quintile") +
ylab("Changing Weight(Mean Weight)")+
theme_bw()
data3 %>%
group_by(step.diff.quintile, gender) %>%
summarise(mean_weight=mean(weight_nz.diff, na.rm=TRUE),
sd_weight=sd(weight_nz.diff, na.rm=TRUE),
se_weight=sd_weight / sqrt(n())) %>%
ggplot(mapping = aes(x=step.diff.quintile, y=mean_weight, fill=gender)) +
geom_errorbar(aes(ymin=mean_weight-se_weight, ymax=mean_weight+se_weight), width=0.3,
position=position_dodge(width=0.5)) +
geom_bar(stat="identity", position=position_dodge(width=0.5)) +
xlab("Step Difference Quintile") +
ylab("Changing Weight (Mean Weight)") +
theme_bw() +
scale_fill_manual(values = c("male" = "blue", "female" = "pink")) # ここで性別ごとの色を指定
## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.
colnames(data3)
## [1] "仮個人id" "gender"
## [3] "age" "kencom登録年月"
## [5] "step.diff.quintile" "step.diff.median.by.group"
## [7] "weight_nz.diff" "systolicBP_nz.diff"
## [9] "diastolicBP_nz.diff" "ldl_c_nz.diff"
## [11] "hdl_c_nz.diff" "TG_nz.diff"
## [13] "HbA1c_nz.diff" "step_mean.diff"
## [15] "baseline健診受診年月" "followup健診受診年月"
## [17] "weight_nz.b" "systolicBP_nz.b"
## [19] "diastolicBP_nz.b" "ldl_c_nz.b"
## [21] "hdl_c_nz.b" "TG_nz.b"
## [23] "HbA1c_nz.b" "weight_nz.f"
## [25] "systolicBP_nz.f" "diastolicBP_nz.f"
## [27] "ldl_c_nz.f" "hdl_c_nz.f"
## [29] "TG_nz.f" "HbA1c_nz.f"
data3 %>%
pivot_longer(
cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
names_to = "measure_type",
values_to = "diff_value"
) %>%
group_by(step.diff.quintile, measure_type) %>%
summarise(mean=mean(diff_value,na.rm=T),
sd=sd(diff_value,na.rm=T),
se=sd / sqrt(n())) %>%
ggplot(mapping = aes(x = step.diff.quintile, y =mean)) +
geom_bar(stat = "identity",position = "dodge") +
geom_errorbar(aes(ymin=mean-se,ymax=mean+se),width=0.2)+
facet_wrap(~measure_type,scales = "free")+
theme_classic()
## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.
data3 %>%
pivot_longer(
cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
names_to = "measure_type",
values_to = "diff_value"
) %>%
group_by(step.diff.quintile, measure_type, gender) %>%
summarise(mean = mean(diff_value, na.rm = TRUE),
sd = sd(diff_value, na.rm = TRUE),
se = sd / sqrt(n())) %>%
ggplot(mapping = aes(x = step.diff.quintile, y = mean, fill = gender)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.2, position = position_dodge(0.9)) +
facet_wrap(~measure_type, scales = "free") +
scale_fill_manual(values = c("male" = "blue", "female" = "red")) +
theme_classic()
## `summarise()` has grouped output by 'step.diff.quintile', 'measure_type'. You
## can override using the `.groups` argument.
健診検査値変化量 = Β x step.diff.median.by.group+切片(intercept)
data3 %>% head()
## 仮個人id gender age kencom登録年月 step.diff.quintile
## 1 0yuPvFowq6 male 66 2016-04-01 1
## 2 1OwsKnMNaE male 39 2016-04-01 3
## 3 0IliCoWCZV male 31 2016-04-01 3
## 4 M0tEnZHNql female 51 2016-04-01 2
## 5 PDnTXele4g male 26 2016-04-01 5
## 6 AecvtKe8cE male 34 2016-05-01 2
## step.diff.median.by.group weight_nz.diff systolicBP_nz.diff
## 1 -963.51562 -4.1390000 1.204490
## 2 62.69344 -3.1120477 5.615931
## 3 62.69344 0.7284926 -2.445457
## 4 -327.36152 4.0614129 24.935567
## 5 1596.78921 -2.6122737 10.506455
## 6 -327.36152 -0.3897352 -4.238583
## diastolicBP_nz.diff ldl_c_nz.diff hdl_c_nz.diff TG_nz.diff HbA1c_nz.diff
## 1 2.6720062 5.3590352 0.4610505 -15.515152 -0.48755945
## 2 -3.6108605 -0.3891425 1.9976718 -2.879915 -0.21745953
## 3 -2.5427406 -22.6019218 4.9481587 -2.196534 -0.06833491
## 4 1.9345428 -15.3831264 -2.2091562 -19.505292 -0.16728154
## 5 8.2586546 -18.5112023 -18.5583183 104.989170 -0.08458658
## 6 -0.4893504 -10.5642779 -11.3340042 11.296307 -0.02670434
## step_mean.diff baseline健診受診年月 followup健診受診年月 weight_nz.b
## 1 -623.47945 2015-04-01 2017-04-01 66.89074
## 2 -12.98046 2015-06-01 2017-04-01 90.51351
## 3 143.71995 2015-05-01 2017-05-01 71.95757
## 4 -301.81007 2015-05-01 2017-05-01 67.16708
## 5 1174.52970 2015-05-01 2017-05-01 70.95344
## 6 -576.22646 2015-06-01 2017-05-01 65.04586
## systolicBP_nz.b diastolicBP_nz.b ldl_c_nz.b hdl_c_nz.b TG_nz.b HbA1c_nz.b
## 1 109.9507 71.62864 108.8772 44.50094 76.35546 5.631184
## 2 119.0751 87.45228 117.3376 45.69706 108.15197 6.440423
## 3 133.8459 78.75107 118.8808 58.35234 67.63990 5.337701
## 4 112.6972 80.13369 124.2131 54.28470 132.65872 5.760370
## 5 101.7182 56.37320 155.5491 63.88320 72.98456 5.292819
## 6 111.6170 68.20776 136.7508 71.41820 78.06746 5.156067
## weight_nz.f systolicBP_nz.f diastolicBP_nz.f ldl_c_nz.f hdl_c_nz.f TG_nz.f
## 1 62.75174 111.1552 74.30065 114.23626 44.96199 60.84030
## 2 87.40146 124.6911 83.84142 116.94841 47.69473 105.27205
## 3 72.68606 131.4004 76.20833 96.27893 63.30050 65.44337
## 4 71.22849 137.6327 82.06824 108.82998 52.07554 113.15343
## 5 68.34116 112.2247 64.63186 137.03787 45.32488 177.97374
## 6 64.65613 107.3784 67.71841 126.18648 60.08419 89.36376
## HbA1c_nz.f
## 1 5.143625
## 2 6.222963
## 3 5.269366
## 4 5.593088
## 5 5.208233
## 6 5.129363
##step.diff.median.by.groupについて
unique(data3$step.diff.median.by.group)
## [1] -963.51562 62.69344 -327.36152 1596.78921 516.39142
n_distinct(data3$step.diff.median.by.group)
## [1] 5
summary(data3$step.diff.median.by.group)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -963.52 -327.36 62.69 177.03 516.39 1596.79
str(data3$step.diff.median.by.group)
## num [1:5473] -963.5 62.7 62.7 -327.4 1596.8 ...
data4<-data3 %>%
pivot_longer(
cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
names_to = "measure_type",
values_to = "diff_value"
) %>%
group_by(step.diff.median.by.group, measure_type) %>%
summarise(mean=mean(diff_value,na.rm=T),
sd=sd(diff_value,na.rm=T),
se=sd / sqrt(n()))
## `summarise()` has grouped output by 'step.diff.median.by.group'. You can
## override using the `.groups` argument.
data4 %>% head()
## # A tibble: 6 × 5
## # Groups: step.diff.median.by.group [1]
## step.diff.median.by.group measure_type mean sd se
## <dbl> <chr> <dbl> <dbl> <dbl>
## 1 -964. HbA1c_nz.diff 0.0488 0.426 0.0129
## 2 -964. TG_nz.diff 3.25 83.9 2.54
## 3 -964. diastolicBP_nz.diff 1.39 9.12 0.276
## 4 -964. hdl_c_nz.diff 0.196 9.42 0.285
## 5 -964. ldl_c_nz.diff 3.10 23.6 0.712
## 6 -964. systolicBP_nz.diff 1.78 12.9 0.389
#dBP
DBP<-
data4 %>%
filter(measure_type=="diastolicBP_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="dBP_diff",.before=Estimate)
#sBP
SBP<-
data4 %>%
filter(measure_type=="systolicBP_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="sBP_diff",.before=Estimate)
#weight
weight<-
data4 %>%
filter(measure_type=="weight_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="weight_diff",.before=Estimate)
#HbA1c
HbA1c<-
data4 %>%
filter(measure_type=="HbA1c_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="HbA1c_diff",.before=Estimate)
#TG
TG<-
data4 %>%
filter(measure_type=="TG_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="TG_diff",.before=Estimate)
#HDL
HDL<-
data4 %>%
filter(measure_type=="hdl_c_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="HDL_diff",.before=Estimate)
#LDL
LDL<-
data4 %>%
filter(measure_type=="ldl_c_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="LDL_diff",.before=Estimate)
data5<-bind_rows(DBP,SBP,weight,HbA1c,TG,HDL,LDL)
data5
## term Estimate p_value
## step.diff.median.by.group...1 dBP_diff -2.890070e-04 0.106801745
## step.diff.median.by.group...2 sBP_diff -4.237653e-04 0.022124846
## step.diff.median.by.group...3 weight_diff -3.551869e-04 0.003169958
## step.diff.median.by.group...4 HbA1c_diff -1.468281e-05 0.020568124
## step.diff.median.by.group...5 TG_diff -2.401945e-03 0.279411725
## step.diff.median.by.group...6 HDL_diff 7.970896e-04 0.007896832
## step.diff.median.by.group...7 LDL_diff -5.979974e-04 0.288273409
data5 %>%
mutate("p<0.05"=if_else(p_value<0.05,"*",""))
## term Estimate p_value p<0.05
## step.diff.median.by.group...1 dBP_diff -2.890070e-04 0.106801745
## step.diff.median.by.group...2 sBP_diff -4.237653e-04 0.022124846 *
## step.diff.median.by.group...3 weight_diff -3.551869e-04 0.003169958 *
## step.diff.median.by.group...4 HbA1c_diff -1.468281e-05 0.020568124 *
## step.diff.median.by.group...5 TG_diff -2.401945e-03 0.279411725
## step.diff.median.by.group...6 HDL_diff 7.970896e-04 0.007896832 *
## step.diff.median.by.group...7 LDL_diff -5.979974e-04 0.288273409
#step.diff.median.by.groupを5つに分類して各検査値の変化量を比較
data6 <- data3 %>%
mutate(
step.diff.median.group = round(step.diff.median.by.group, 3),
step.diff.category = case_when(
step.diff.median.group == round(-963.51562, 3) ~ 1,
step.diff.median.group == round(-327.36152, 3) ~ 2,
step.diff.median.group == round(62.69344, 3) ~ 3,
step.diff.median.group == round(516.39142, 3) ~ 4,
step.diff.median.group == round(1596.78921, 3) ~ 5,
TRUE ~ NA # どの条件にも当てはまらない場合、NAを割り当てる
)
)
data6 %>% head()
## 仮個人id gender age kencom登録年月 step.diff.quintile
## 1 0yuPvFowq6 male 66 2016-04-01 1
## 2 1OwsKnMNaE male 39 2016-04-01 3
## 3 0IliCoWCZV male 31 2016-04-01 3
## 4 M0tEnZHNql female 51 2016-04-01 2
## 5 PDnTXele4g male 26 2016-04-01 5
## 6 AecvtKe8cE male 34 2016-05-01 2
## step.diff.median.by.group weight_nz.diff systolicBP_nz.diff
## 1 -963.51562 -4.1390000 1.204490
## 2 62.69344 -3.1120477 5.615931
## 3 62.69344 0.7284926 -2.445457
## 4 -327.36152 4.0614129 24.935567
## 5 1596.78921 -2.6122737 10.506455
## 6 -327.36152 -0.3897352 -4.238583
## diastolicBP_nz.diff ldl_c_nz.diff hdl_c_nz.diff TG_nz.diff HbA1c_nz.diff
## 1 2.6720062 5.3590352 0.4610505 -15.515152 -0.48755945
## 2 -3.6108605 -0.3891425 1.9976718 -2.879915 -0.21745953
## 3 -2.5427406 -22.6019218 4.9481587 -2.196534 -0.06833491
## 4 1.9345428 -15.3831264 -2.2091562 -19.505292 -0.16728154
## 5 8.2586546 -18.5112023 -18.5583183 104.989170 -0.08458658
## 6 -0.4893504 -10.5642779 -11.3340042 11.296307 -0.02670434
## step_mean.diff baseline健診受診年月 followup健診受診年月 weight_nz.b
## 1 -623.47945 2015-04-01 2017-04-01 66.89074
## 2 -12.98046 2015-06-01 2017-04-01 90.51351
## 3 143.71995 2015-05-01 2017-05-01 71.95757
## 4 -301.81007 2015-05-01 2017-05-01 67.16708
## 5 1174.52970 2015-05-01 2017-05-01 70.95344
## 6 -576.22646 2015-06-01 2017-05-01 65.04586
## systolicBP_nz.b diastolicBP_nz.b ldl_c_nz.b hdl_c_nz.b TG_nz.b HbA1c_nz.b
## 1 109.9507 71.62864 108.8772 44.50094 76.35546 5.631184
## 2 119.0751 87.45228 117.3376 45.69706 108.15197 6.440423
## 3 133.8459 78.75107 118.8808 58.35234 67.63990 5.337701
## 4 112.6972 80.13369 124.2131 54.28470 132.65872 5.760370
## 5 101.7182 56.37320 155.5491 63.88320 72.98456 5.292819
## 6 111.6170 68.20776 136.7508 71.41820 78.06746 5.156067
## weight_nz.f systolicBP_nz.f diastolicBP_nz.f ldl_c_nz.f hdl_c_nz.f TG_nz.f
## 1 62.75174 111.1552 74.30065 114.23626 44.96199 60.84030
## 2 87.40146 124.6911 83.84142 116.94841 47.69473 105.27205
## 3 72.68606 131.4004 76.20833 96.27893 63.30050 65.44337
## 4 71.22849 137.6327 82.06824 108.82998 52.07554 113.15343
## 5 68.34116 112.2247 64.63186 137.03787 45.32488 177.97374
## 6 64.65613 107.3784 67.71841 126.18648 60.08419 89.36376
## HbA1c_nz.f step.diff.median.group step.diff.category
## 1 5.143625 -963.516 1
## 2 6.222963 62.693 3
## 3 5.269366 62.693 3
## 4 5.593088 -327.362 2
## 5 5.208233 1596.789 5
## 6 5.129363 -327.362 2
data6 %>%
pivot_longer(
cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
names_to = "measure_type",
values_to = "diff_value"
) %>%
group_by(step.diff.category, measure_type) %>%
summarise(mean=mean(diff_value,na.rm=T),
sd=sd(diff_value,na.rm=T),
se=sd / sqrt(n())) %>%
ggplot(mapping = aes(x = step.diff.category, y =mean)) +
geom_bar(stat = "identity") +
geom_errorbar(aes(ymin=mean-se,ymax=mean+se),width=0.2)+
facet_wrap(~measure_type,scales = "free")+
theme_classic()
## `summarise()` has grouped output by 'step.diff.category'. You can override
## using the `.groups` argument.
data6 %>%
pivot_longer(
cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
names_to = "measure_type",
values_to = "diff_value"
) %>%
group_by(step.diff.category, measure_type, gender) %>% # 追加: gender
summarise(mean = mean(diff_value, na.rm = TRUE),
sd = sd(diff_value, na.rm = TRUE),
se = sd / sqrt(n())) %>%
ggplot(mapping = aes(x = step.diff.category, y = mean, fill = gender)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.2, position = position_dodge(0.9)) +
facet_wrap(~measure_type, scales = "free") +
scale_fill_manual(values = c("male" = "blue", "female" = "red")) +
theme_classic()
## `summarise()` has grouped output by 'step.diff.category', 'measure_type'. You
## can override using the `.groups` argument.
#write_csv(data6, "/Users/abekohei/Downloads/2_1日目講義前配布用/script_for_provision/data6.csv")
data3 %>%
pivot_longer(
cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
names_to = "measure_type",
values_to = "diff_value"
) %>%
group_by(step.diff.quintile, measure_type) %>%
summarise(mean=mean(diff_value,na.rm=T),
sd=sd(diff_value,na.rm=T),
se=sd / sqrt(n())) %>%
ggplot(mapping = aes(x = step.diff.quintile, y =mean)) +
geom_bar(stat = "identity",position = "dodge") +
geom_errorbar(aes(ymin=mean-se,ymax=mean+se),width=0.2)+
facet_wrap(~measure_type,scales = "free")+
theme_classic()
## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.
data3 %>%
pivot_longer(
cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
names_to = "measure_type",
values_to = "diff_value"
) %>%
group_by(step.diff.quintile, measure_type, gender) %>%
summarise(mean = mean(diff_value, na.rm = TRUE),
sd = sd(diff_value, na.rm = TRUE),
se = sd / sqrt(n())) %>%
ggplot(mapping = aes(x = step.diff.quintile, y = mean, fill = gender)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.2, position = position_dodge(0.9)) +
facet_wrap(~measure_type, scales = "free") +
scale_fill_manual(values = c("male" = "blue", "female" = "red")) +
theme_classic()
## `summarise()` has grouped output by 'step.diff.quintile', 'measure_type'. You
## can override using the `.groups` argument.
data_male<-data3 %>% filter(gender=="male")
df_male1<-data_male %>%
pivot_longer(
cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
names_to = "measure_type",
values_to = "diff_value"
) %>%
group_by(step.diff.median.by.group, measure_type) %>%
summarise(mean=mean(diff_value,na.rm=T),
sd=sd(diff_value,na.rm=T),
se=sd / sqrt(n()))
## `summarise()` has grouped output by 'step.diff.median.by.group'. You can
## override using the `.groups` argument.
#dBP
DBP_male<-
df_male1 %>%
filter(measure_type=="diastolicBP_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="dBP_diff",.before=Estimate)
#sBP
SBP_male<-
df_male1 %>%
filter(measure_type=="systolicBP_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="sBP_diff",.before=Estimate)
#weight
weight_male<-
df_male1 %>%
filter(measure_type=="weight_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="weight_diff",.before=Estimate)
#HbA1c
HbA1c_male<-
df_male1 %>%
filter(measure_type=="HbA1c_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="HbA1c_diff",.before=Estimate)
#TG
TG_male<-
df_male1 %>%
filter(measure_type=="TG_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="TG_diff",.before=Estimate)
#HDL
HDL_male<-
df_male1 %>%
filter(measure_type=="hdl_c_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="HDL_diff",.before=Estimate)
#LDL
LDL_male<-
df_male1 %>%
filter(measure_type=="ldl_c_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="LDL_diff",.before=Estimate)
df5_male<-bind_rows(DBP_male,SBP_male,weight_male,HbA1c_male,TG_male,HDL_male,LDL_male)
df5_male
## term Estimate p_value
## step.diff.median.by.group...1 dBP_diff -7.193161e-04 0.034426241
## step.diff.median.by.group...2 sBP_diff -6.272823e-04 0.116603247
## step.diff.median.by.group...3 weight_diff -3.520797e-04 0.019218203
## step.diff.median.by.group...4 HbA1c_diff -1.805442e-05 0.114183801
## step.diff.median.by.group...5 TG_diff -2.843390e-03 0.192499844
## step.diff.median.by.group...6 HDL_diff 8.258661e-04 0.008869557
## step.diff.median.by.group...7 LDL_diff -2.149112e-04 0.719694014
df5_male %>%
mutate("p<0.05"=if_else(p_value<0.05,"*",""))
## term Estimate p_value p<0.05
## step.diff.median.by.group...1 dBP_diff -7.193161e-04 0.034426241 *
## step.diff.median.by.group...2 sBP_diff -6.272823e-04 0.116603247
## step.diff.median.by.group...3 weight_diff -3.520797e-04 0.019218203 *
## step.diff.median.by.group...4 HbA1c_diff -1.805442e-05 0.114183801
## step.diff.median.by.group...5 TG_diff -2.843390e-03 0.192499844
## step.diff.median.by.group...6 HDL_diff 8.258661e-04 0.008869557 *
## step.diff.median.by.group...7 LDL_diff -2.149112e-04 0.719694014
data_female<-data3 %>% filter(gender=="female")
df_female1<-data_female %>%
pivot_longer(
cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
names_to = "measure_type",
values_to = "diff_value"
) %>%
group_by(step.diff.median.by.group, measure_type) %>%
summarise(mean=mean(diff_value,na.rm=T),
sd=sd(diff_value,na.rm=T),
se=sd / sqrt(n()))
## `summarise()` has grouped output by 'step.diff.median.by.group'. You can
## override using the `.groups` argument.
#dBP
DBP_female<-
df_female1 %>%
filter(measure_type=="diastolicBP_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="dBP_diff",.before=Estimate)
#sBP
SBP_female<-
df_female1 %>%
filter(measure_type=="systolicBP_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="sBP_diff",.before=Estimate)
#weight
weight_female<-
df_female1 %>%
filter(measure_type=="weight_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="weight_diff",.before=Estimate)
#HbA1c
HbA1c_female<-
df_female1 %>%
filter(measure_type=="HbA1c_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="HbA1c_diff",.before=Estimate)
#TG
TG_female<-
df_female1 %>%
filter(measure_type=="TG_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="TG_diff",.before=Estimate)
#HDL
HDL_female<-
df_female1 %>%
filter(measure_type=="hdl_c_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="HDL_diff",.before=Estimate)
#LDL
LDL_female<-
df_female1 %>%
filter(measure_type=="ldl_c_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="LDL_diff",.before=Estimate)
df5_female<-bind_rows(DBP_female,SBP_female,weight_female,HbA1c_female,TG_female,HDL_female,LDL_female)
df5_female
## term Estimate p_value
## step.diff.median.by.group...1 dBP_diff 3.640916e-04 0.03953484
## step.diff.median.by.group...2 sBP_diff -1.192206e-04 0.77770724
## step.diff.median.by.group...3 weight_diff -3.569323e-04 0.01954224
## step.diff.median.by.group...4 HbA1c_diff -9.891568e-06 0.18287912
## step.diff.median.by.group...5 TG_diff -1.700495e-03 0.52461853
## step.diff.median.by.group...6 HDL_diff 7.601839e-04 0.01828471
## step.diff.median.by.group...7 LDL_diff -1.163230e-03 0.09320509
df5_female %>%
mutate("p<0.05"=if_else(p_value<0.05,"*",""))
## term Estimate p_value p<0.05
## step.diff.median.by.group...1 dBP_diff 3.640916e-04 0.03953484 *
## step.diff.median.by.group...2 sBP_diff -1.192206e-04 0.77770724
## step.diff.median.by.group...3 weight_diff -3.569323e-04 0.01954224 *
## step.diff.median.by.group...4 HbA1c_diff -9.891568e-06 0.18287912
## step.diff.median.by.group...5 TG_diff -1.700495e-03 0.52461853
## step.diff.median.by.group...6 HDL_diff 7.601839e-04 0.01828471 *
## step.diff.median.by.group...7 LDL_diff -1.163230e-03 0.09320509
HT=sBP130以上orDL=LDL:140以上orHDL:40未満orTG:175以上orDM=6.5
data3_highrisk<-data3 %>%
filter(systolicBP_nz.b>=130 | ldl_c_nz.b>=140 | hdl_c_nz.b<40 | TG_nz.b>=175 | HbA1c_nz.b>=6.5)
data3_highrisk %>% nrow()
## [1] 2449
data3_highrisk %>% skim()
| Name | Piped data |
| Number of rows | 2449 |
| Number of columns | 30 |
| _______________________ | |
| Column type frequency: | |
| character | 5 |
| numeric | 25 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| 仮個人id | 0 | 1 | 10 | 10 | 0 | 2449 | 0 |
| gender | 0 | 1 | 4 | 6 | 0 | 2 | 0 |
| kencom登録年月 | 0 | 1 | 10 | 10 | 0 | 24 | 0 |
| baseline健診受診年月 | 0 | 1 | 10 | 10 | 0 | 30 | 0 |
| followup健診受診年月 | 0 | 1 | 10 | 10 | 0 | 23 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| age | 0 | 1 | 44.97 | 11.06 | 18.00 | 37.00 | 46.00 | 53.00 | 71.00 | ▂▆▇▇▂ |
| step.diff.quintile | 0 | 1 | 3.01 | 1.42 | 1.00 | 2.00 | 3.00 | 4.00 | 5.00 | ▇▇▇▇▇ |
| step.diff.median.by.group | 0 | 1 | 182.03 | 862.23 | -963.52 | -327.36 | 62.69 | 516.39 | 1596.79 | ▅▅▇▁▅ |
| weight_nz.diff | 0 | 1 | 0.08 | 4.39 | -31.13 | -2.38 | 0.20 | 2.76 | 19.59 | ▁▁▅▇▁ |
| systolicBP_nz.diff | 0 | 1 | -1.36 | 14.36 | -59.82 | -10.51 | -1.26 | 8.19 | 52.65 | ▁▂▇▃▁ |
| diastolicBP_nz.diff | 0 | 1 | 0.28 | 10.04 | -37.00 | -6.16 | 0.51 | 6.96 | 45.97 | ▁▅▇▂▁ |
| ldl_c_nz.diff | 0 | 1 | -2.53 | 26.60 | -143.84 | -16.50 | -1.30 | 13.35 | 98.39 | ▁▁▇▇▁ |
| hdl_c_nz.diff | 0 | 1 | 1.18 | 8.91 | -36.25 | -4.26 | 0.93 | 6.33 | 52.75 | ▁▆▇▁▁ |
| TG_nz.diff | 0 | 1 | -7.55 | 99.27 | -1279.42 | -30.67 | -3.42 | 22.42 | 1146.49 | ▁▁▇▁▁ |
| HbA1c_nz.diff | 0 | 1 | 0.06 | 0.45 | -4.51 | -0.16 | 0.06 | 0.27 | 5.93 | ▁▁▇▁▁ |
| step_mean.diff | 0 | 1 | 248.10 | 1328.27 | -4286.43 | -442.90 | 68.12 | 687.88 | 10651.17 | ▁▇▁▁▁ |
| weight_nz.b | 0 | 1 | 68.45 | 12.38 | 37.62 | 60.23 | 68.06 | 75.84 | 125.84 | ▂▇▅▁▁ |
| systolicBP_nz.b | 0 | 1 | 126.01 | 16.05 | 81.63 | 114.44 | 127.11 | 136.04 | 209.34 | ▂▇▅▁▁ |
| diastolicBP_nz.b | 0 | 1 | 78.32 | 11.65 | 44.97 | 70.29 | 78.13 | 86.02 | 126.16 | ▁▇▇▂▁ |
| ldl_c_nz.b | 0 | 1 | 138.83 | 31.24 | 30.71 | 116.48 | 142.68 | 158.84 | 276.22 | ▁▅▇▁▁ |
| hdl_c_nz.b | 0 | 1 | 59.09 | 16.18 | 24.59 | 47.49 | 56.80 | 68.42 | 140.46 | ▃▇▂▁▁ |
| TG_nz.b | 0 | 1 | 139.89 | 108.49 | 23.75 | 76.56 | 110.50 | 169.93 | 1450.27 | ▇▁▁▁▁ |
| HbA1c_nz.b | 0 | 1 | 5.54 | 0.59 | 4.11 | 5.21 | 5.45 | 5.72 | 12.17 | ▇▂▁▁▁ |
| weight_nz.f | 0 | 1 | 68.53 | 12.35 | 38.57 | 60.55 | 67.86 | 75.87 | 124.70 | ▂▇▅▁▁ |
| systolicBP_nz.f | 0 | 1 | 124.66 | 15.29 | 82.37 | 114.66 | 123.93 | 133.82 | 204.01 | ▂▇▅▁▁ |
| diastolicBP_nz.f | 0 | 1 | 78.60 | 11.18 | 41.49 | 71.04 | 78.39 | 86.03 | 119.35 | ▁▅▇▃▁ |
| ldl_c_nz.f | 0 | 1 | 136.30 | 32.06 | 44.80 | 114.14 | 135.71 | 156.69 | 279.04 | ▂▇▇▁▁ |
| hdl_c_nz.f | 0 | 1 | 60.27 | 16.66 | 25.77 | 48.25 | 57.54 | 69.14 | 150.95 | ▅▇▂▁▁ |
| TG_nz.f | 0 | 1 | 132.34 | 104.76 | 20.33 | 73.02 | 108.35 | 158.94 | 1544.59 | ▇▁▁▁▁ |
| HbA1c_nz.f | 0 | 1 | 5.60 | 0.60 | 4.32 | 5.26 | 5.51 | 5.77 | 12.56 | ▇▁▁▁▁ |
data3_highrisk %>% head()
## 仮個人id gender age kencom登録年月 step.diff.quintile
## 1 0IliCoWCZV male 31 2016-04-01 3
## 2 PDnTXele4g male 26 2016-04-01 5
## 3 EPbHvE0cSI female 22 2016-05-01 2
## 4 qBqHi7ri2W male 42 2016-04-01 2
## 5 TRZYorNdbw female 32 2016-05-01 3
## 6 ULSKJQhwhT male 25 2016-06-01 1
## step.diff.median.by.group weight_nz.diff systolicBP_nz.diff
## 1 62.69344 0.7284926 -2.445457
## 2 1596.78921 -2.6122737 10.506455
## 3 -327.36152 -4.4093780 2.458827
## 4 -327.36152 -2.2889677 8.490237
## 5 62.69344 8.3624345 -16.204654
## 6 -963.51562 3.7261152 28.878772
## diastolicBP_nz.diff ldl_c_nz.diff hdl_c_nz.diff TG_nz.diff HbA1c_nz.diff
## 1 -2.542741 -22.601922 4.948159 -2.196534 -0.06833491
## 2 8.258655 -18.511202 -18.558318 104.989170 -0.08458658
## 3 -4.471324 -41.671288 -11.151447 25.578212 0.36740637
## 4 10.265643 4.338429 -8.284412 20.048275 0.36238565
## 5 -5.522645 -20.854080 -12.814424 74.910630 -0.28642599
## 6 3.430650 7.664912 6.415059 -2.612318 -0.06264475
## step_mean.diff baseline健診受診年月 followup健診受診年月 weight_nz.b
## 1 143.7200 2015-05-01 2017-05-01 71.95757
## 2 1174.5297 2015-05-01 2017-05-01 70.95344
## 3 -466.6492 2015-05-01 2017-06-01 70.90351
## 4 -490.7139 2015-05-01 2017-06-01 70.25011
## 5 129.7318 2015-06-01 2017-06-01 71.35653
## 6 -635.0747 2015-06-01 2017-06-01 57.70602
## systolicBP_nz.b diastolicBP_nz.b ldl_c_nz.b hdl_c_nz.b TG_nz.b HbA1c_nz.b
## 1 133.84588 78.75107 118.8808 58.35234 67.63990 5.337701
## 2 101.71822 56.37320 155.5491 63.88320 72.98456 5.292819
## 3 115.42029 73.12555 165.4906 48.76714 111.67693 5.506758
## 4 93.26939 55.46555 152.4000 51.88232 99.73815 4.618830
## 5 112.29083 57.39880 142.6868 58.44698 77.56937 5.520969
## 6 139.42054 76.05343 127.1050 66.96992 59.32766 5.753736
## weight_nz.f systolicBP_nz.f diastolicBP_nz.f ldl_c_nz.f hdl_c_nz.f TG_nz.f
## 1 72.68606 131.40042 76.20833 96.27893 63.30050 65.44337
## 2 68.34116 112.22468 64.63186 137.03787 45.32488 177.97374
## 3 66.49414 117.87911 68.65423 123.81931 37.61569 137.25515
## 4 67.96114 101.75963 65.73120 156.73848 43.59791 119.78642
## 5 79.71897 96.08617 51.87616 121.83272 45.63256 152.48000
## 6 61.43213 168.29932 79.48408 134.76990 73.38498 56.71534
## HbA1c_nz.f
## 1 5.269366
## 2 5.208233
## 3 5.874164
## 4 4.981216
## 5 5.234543
## 6 5.691091
ggplot(data3_highrisk,mapping = aes(x=age,fill=gender))+
geom_histogram(binwidth=5, position="identity",alpha=0.5) +
facet_wrap(~gender,scales = "free")+
theme_bw()
data3_highrisk %>%
pivot_longer(
cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
names_to = "measure_type",
values_to = "diff_value"
) %>%
group_by(step.diff.quintile, measure_type) %>%
summarise(mean=mean(diff_value,na.rm=T),
sd=sd(diff_value,na.rm=T),
se=sd / sqrt(n())) %>%
ggplot(mapping = aes(x = step.diff.quintile, y =mean)) +
geom_bar(stat = "identity",position = "dodge") +
geom_errorbar(aes(ymin=mean-se,ymax=mean+se),width=0.2)+
facet_wrap(~measure_type,scales = "free")+
theme_classic()
## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.
data4_highrisk<-data3_highrisk %>%
pivot_longer(
cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
names_to = "measure_type",
values_to = "diff_value"
) %>%
group_by(step.diff.median.by.group, measure_type) %>%
summarise(mean=mean(diff_value,na.rm=T),
sd=sd(diff_value,na.rm=T),
se=sd / sqrt(n()))
## `summarise()` has grouped output by 'step.diff.median.by.group'. You can
## override using the `.groups` argument.
data4_highrisk %>% head()
## # A tibble: 6 × 5
## # Groups: step.diff.median.by.group [1]
## step.diff.median.by.group measure_type mean sd se
## <dbl> <chr> <dbl> <dbl> <dbl>
## 1 -964. HbA1c_nz.diff 0.0698 0.538 0.0244
## 2 -964. TG_nz.diff -6.20 110. 5.01
## 3 -964. diastolicBP_nz.diff 0.969 9.61 0.436
## 4 -964. hdl_c_nz.diff 0.266 8.90 0.404
## 5 -964. ldl_c_nz.diff -0.860 27.0 1.23
## 6 -964. systolicBP_nz.diff 0.146 14.3 0.650
#dBP
DBP_highrisk<-
data4_highrisk %>%
filter(measure_type=="diastolicBP_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="dBP_diff",.before=Estimate)
#sBP
SBP_highrisk<-
data4_highrisk %>%
filter(measure_type=="systolicBP_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="sBP_diff",.before=Estimate)
#weight
weight_highrisk<-
data4_highrisk %>%
filter(measure_type=="weight_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="weight_diff",.before=Estimate)
#HbA1c
HbA1c_highrisk<-
data4_highrisk %>%
filter(measure_type=="HbA1c_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="HbA1c_diff",.before=Estimate)
#TG
TG_highrisk<-
data4_highrisk %>%
filter(measure_type=="TG_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="TG_diff",.before=Estimate)
#HDL
HDL_highrisk<-
data4_highrisk %>%
filter(measure_type=="hdl_c_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="HDL_diff",.before=Estimate)
#LDL
LDL_highrisk<-
data4_highrisk %>%
filter(measure_type=="ldl_c_nz.diff") %>%
lm(mean~step.diff.median.by.group,data=.) %>%
summary() %>%
.$coefficients %>%
as.data.frame() %>%
filter(row.names(.)=="step.diff.median.by.group") %>%
dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
mutate(term="LDL_diff",.before=Estimate)
data5_highrisk<-bind_rows(DBP_highrisk,SBP_highrisk,weight_highrisk,HbA1c_highrisk,TG_highrisk,HDL_highrisk,LDL_highrisk)
data5_highrisk
## term Estimate p_value
## step.diff.median.by.group...1 dBP_diff -6.197701e-04 0.019402126
## step.diff.median.by.group...2 sBP_diff -9.449150e-04 0.018892579
## step.diff.median.by.group...3 weight_diff -5.140357e-04 0.000219201
## step.diff.median.by.group...4 HbA1c_diff -1.941723e-05 0.015689553
## step.diff.median.by.group...5 TG_diff -1.804097e-03 0.589938244
## step.diff.median.by.group...6 HDL_diff 9.991996e-04 0.013094852
## step.diff.median.by.group...7 LDL_diff -5.655664e-04 0.444863251
data5_highrisk %>%
mutate("p<0.05"=if_else(p_value<0.05,"*",""))
## term Estimate p_value p<0.05
## step.diff.median.by.group...1 dBP_diff -6.197701e-04 0.019402126 *
## step.diff.median.by.group...2 sBP_diff -9.449150e-04 0.018892579 *
## step.diff.median.by.group...3 weight_diff -5.140357e-04 0.000219201 *
## step.diff.median.by.group...4 HbA1c_diff -1.941723e-05 0.015689553 *
## step.diff.median.by.group...5 TG_diff -1.804097e-03 0.589938244
## step.diff.median.by.group...6 HDL_diff 9.991996e-04 0.013094852 *
## step.diff.median.by.group...7 LDL_diff -5.655664e-04 0.444863251
HT=sBP130以上、DL=LDL:140以上orHDL:40未満orTG:175以上、DM=6.5
data3_high<-data3 %>%
filter(systolicBP_nz.b>=130) %>%
filter(ldl_c_nz.b>=140 | hdl_c_nz.b<40 | TG_nz.b>=175) %>%
filter(HbA1c_nz.b>=6.5)
data3_high %>% nrow()
## [1] 31
data3_high %>% skim()
| Name | Piped data |
| Number of rows | 31 |
| Number of columns | 30 |
| _______________________ | |
| Column type frequency: | |
| character | 5 |
| numeric | 25 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| 仮個人id | 0 | 1 | 10 | 10 | 0 | 31 | 0 |
| gender | 0 | 1 | 4 | 6 | 0 | 2 | 0 |
| kencom登録年月 | 0 | 1 | 10 | 10 | 0 | 9 | 0 |
| baseline健診受診年月 | 0 | 1 | 10 | 10 | 0 | 12 | 0 |
| followup健診受診年月 | 0 | 1 | 10 | 10 | 0 | 8 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| age | 0 | 1 | 45.00 | 12.80 | 23.00 | 35.50 | 43.00 | 53.00 | 71.00 | ▅▇▆▅▃ |
| step.diff.quintile | 0 | 1 | 3.00 | 1.71 | 1.00 | 1.00 | 3.00 | 5.00 | 5.00 | ▇▃▂▃▇ |
| step.diff.median.by.group | 0 | 1 | 234.74 | 1073.62 | -963.52 | -963.52 | 62.69 | 1596.79 | 1596.79 | ▇▃▆▁▇ |
| weight_nz.diff | 0 | 1 | -2.81 | 4.15 | -13.73 | -4.92 | -2.81 | -0.24 | 6.22 | ▁▅▇▇▂ |
| systolicBP_nz.diff | 0 | 1 | -8.50 | 18.52 | -51.36 | -15.55 | -9.64 | 5.73 | 25.19 | ▂▂▇▅▂ |
| diastolicBP_nz.diff | 0 | 1 | -6.37 | 10.96 | -24.31 | -14.70 | -4.82 | 0.37 | 14.28 | ▅▆▇▃▅ |
| ldl_c_nz.diff | 0 | 1 | -9.39 | 51.50 | -143.84 | -25.34 | -4.79 | 18.95 | 88.81 | ▁▂▇▇▂ |
| hdl_c_nz.diff | 0 | 1 | 2.26 | 9.68 | -20.54 | -2.14 | 2.78 | 7.66 | 24.60 | ▂▂▇▃▁ |
| TG_nz.diff | 0 | 1 | -32.13 | 269.77 | -504.79 | -114.13 | -28.89 | 9.30 | 1146.49 | ▂▇▁▁▁ |
| HbA1c_nz.diff | 0 | 1 | -0.06 | 2.04 | -4.51 | -1.02 | 0.08 | 0.44 | 5.93 | ▂▃▇▁▁ |
| step_mean.diff | 0 | 1 | 164.75 | 1371.49 | -2638.22 | -637.24 | 87.20 | 1128.13 | 3017.17 | ▂▇▅▆▂ |
| weight_nz.b | 0 | 1 | 82.01 | 15.90 | 59.30 | 70.81 | 80.98 | 88.85 | 120.46 | ▇▅▆▂▂ |
| systolicBP_nz.b | 0 | 1 | 144.86 | 15.84 | 130.98 | 134.71 | 140.20 | 146.54 | 192.79 | ▇▃▁▁▁ |
| diastolicBP_nz.b | 0 | 1 | 89.44 | 9.31 | 68.94 | 85.10 | 89.16 | 91.76 | 111.44 | ▂▂▇▁▂ |
| ldl_c_nz.b | 0 | 1 | 137.93 | 33.37 | 67.33 | 109.47 | 141.31 | 162.84 | 201.55 | ▂▅▇▅▃ |
| hdl_c_nz.b | 0 | 1 | 51.16 | 14.73 | 32.33 | 41.78 | 45.88 | 57.57 | 89.24 | ▇▇▃▂▂ |
| TG_nz.b | 0 | 1 | 240.11 | 157.52 | 67.15 | 126.42 | 183.14 | 292.91 | 611.58 | ▇▅▁▁▂ |
| HbA1c_nz.b | 0 | 1 | 7.62 | 1.42 | 6.53 | 6.72 | 6.97 | 7.80 | 12.17 | ▇▂▁▁▁ |
| weight_nz.f | 0 | 1 | 79.20 | 14.76 | 59.26 | 69.45 | 78.01 | 87.70 | 114.79 | ▇▇▇▁▂ |
| systolicBP_nz.f | 0 | 1 | 136.36 | 14.70 | 106.65 | 126.08 | 136.07 | 146.41 | 167.78 | ▂▇▇▆▃ |
| diastolicBP_nz.f | 0 | 1 | 83.06 | 9.38 | 61.77 | 77.04 | 82.42 | 89.67 | 100.17 | ▂▃▇▆▅ |
| ldl_c_nz.f | 0 | 1 | 128.55 | 36.40 | 53.48 | 102.15 | 131.36 | 156.85 | 204.44 | ▂▇▇▇▂ |
| hdl_c_nz.f | 0 | 1 | 53.41 | 13.46 | 25.77 | 45.65 | 51.13 | 60.04 | 92.02 | ▂▇▅▂▁ |
| TG_nz.f | 0 | 1 | 207.98 | 253.93 | 70.63 | 109.11 | 136.71 | 199.15 | 1483.99 | ▇▁▁▁▁ |
| HbA1c_nz.f | 0 | 1 | 7.55 | 1.45 | 5.61 | 6.60 | 7.10 | 8.03 | 12.56 | ▇▇▁▁▁ |
ggplot(data3_high,mapping = aes(x=age,fill=gender))+
geom_histogram(binwidth=5, position="identity",alpha=0.5) +
facet_wrap(~gender,scales = "free")+
theme_bw()
data3_high %>%
pivot_longer(
cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
names_to = "measure_type",
values_to = "diff_value"
) %>%
group_by(step.diff.quintile, measure_type) %>%
summarise(mean=mean(diff_value,na.rm=T),
sd=sd(diff_value,na.rm=T),
se=sd / sqrt(n())) %>%
ggplot(mapping = aes(x = step.diff.quintile, y =mean)) +
geom_bar(stat = "identity",position = "dodge") +
geom_errorbar(aes(ymin=mean-se,ymax=mean+se),width=0.2)+
facet_wrap(~measure_type,scales = "free")+
theme_classic()
## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.