R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

pacman::p_load(tidyverse,readxl,ggplot2,dplyr,tableone,ggpubr,ggrepel,DBI,skimr,dbplyr,pryr,lubridate, survival, survminer,car,svglite,MatchIt, cobalt)

#データ読み込み

data1<-read.csv("/Users/abekohei/Downloads/2_1日目講義前配布用/script_for_provision/processed_data/data1_step.destructed.csv")
data2<-read.csv("/Users/abekohei/Downloads/2_1日目講義前配布用/script_for_provision/processed_data/data2_monthly_step.destructed.csv")
data3<-read.csv("/Users/abekohei/Downloads/2_1日目講義前配布用/script_for_provision/processed_data/data3_exam.destructed.csv")

分析1-1

data1 %>% skimr::skim()
Data summary
Name Piped data
Number of rows 12602
Number of columns 19
_______________________
Column type frequency:
character 4
numeric 15
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
仮個人id 0 1 10 10 0 12602 0
性別_sfl 0 1 2 2 0 2 0
kencom登録年月 0 1 10 10 0 27 0
baseline健診受診年月 0 1 10 10 0 38 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
年齢_sfl 0 1.00 44.72 11.04 18.00 37.00 46.00 53.00 73.00 ▂▅▇▆▁
step_mean.diff 0 1.00 509.85 1594.22 -6205.96 -307.56 200.83 979.56 24369.72 ▅▇▁▁▁
step_mean.b 0 1.00 5641.75 2685.98 25.30 3386.36 5544.94 7509.58 20954.08 ▆▇▂▁▁
step_mean.f 0 1.00 6151.60 2723.39 12.77 4085.90 6017.96 7872.06 30572.20 ▇▇▁▁▁
step_sd.b 0 1.00 3089.95 1172.33 46.16 2245.54 2999.51 3770.87 7677.58 ▂▇▆▂▁
step_sd.f 0 1.00 3420.28 1412.43 47.54 2547.88 3219.49 4026.36 36661.56 ▇▁▁▁▁
step_mean.diff.wd 0 1.00 533.68 1684.35 -7282.11 -305.09 220.83 1038.35 23533.30 ▁▇▁▁▁
step_mean.diff.we 0 1.00 462.63 1748.29 -9706.07 -439.25 198.55 1016.13 26459.46 ▁▇▁▁▁
weight_nz.b 4 1.00 62.87 12.65 31.87 53.18 62.05 70.81 139.68 ▃▇▂▁▁
systolicBP_nz.b 5 1.00 118.19 15.92 75.16 106.95 117.17 127.86 209.34 ▂▇▃▁▁
diastolicBP_nz.b 5 1.00 73.24 11.82 38.80 64.90 72.40 80.78 128.57 ▁▇▆▁▁
ldl_c_nz.b 35 1.00 121.50 31.45 28.53 99.48 119.03 141.13 283.21 ▁▇▅▁▁
hdl_c_nz.b 33 1.00 64.82 16.41 24.59 52.76 63.10 74.91 152.03 ▂▇▃▁▁
TG_nz.b 33 1.00 100.59 78.44 20.16 56.83 80.02 118.76 1588.09 ▇▁▁▁▁
HbA1c_nz.b 330 0.97 5.44 0.54 4.02 5.15 5.37 5.62 13.12 ▇▁▁▁▁

集団の平均値を算出

# 平均と標準偏差を計算
mean_data <- mean(data1$step_mean.diff)
sd_data <- sd(data1$step_mean.diff)
se_data <- sd_data / sqrt(length(data1$step_mean.diff)) 
mean_data
## [1] 509.8529
sd_data
## [1] 1594.224
se_data
## [1] 14.20135

ヒストグラムを作成

#年齢
ggplot(data1,mapping = aes(x=年齢_sfl))+
  geom_histogram(bins = 50)+
  theme_bw()

#体重
ggplot(data1,mapping = aes(x=weight_nz.b))+
  geom_histogram(bins = 100)+
  theme_bw()
## Warning: Removed 4 rows containing non-finite values (`stat_bin()`).

#歩数変化量
ggplot(data1,mapping = aes(x=step_mean.diff))+
  geom_histogram(bins = 100)+
  #xlim(-5000,5000)+
  theme_bw()

# Kolmogorov Smirnovテストの実行
#ks.test(data1, "pnorm", mean = mean_data, sd = sd_data)

正規分布ではないのでウィルコクソン符号順位検定の実行

#wilcox.test(data1$step_mean.diff, mu = 0, paired = FALSE)

t検定実施。結果有意差あり→有意に増加している

data1$step_mean.diff %>% t.test()
## 
##  One Sample t-test
## 
## data:  .
## t = 35.902, df = 12601, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  482.0161 537.6897
## sample estimates:
## mean of x 
##  509.8529

分析1-2

data2 %>% skimr::skim()
Data summary
Name Piped data
Number of rows 302448
Number of columns 10
_______________________
Column type frequency:
character 4
numeric 6
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
仮個人id 0 1 10 10 0 12602 0
性別_sfl 0 1 2 2 0 2 0
kencom登録年月 0 1 10 10 0 27 0
年月 0 1 10 10 0 50 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
年齢_sfl 0 1 44.72 11.04 18 37.00 46.00 53.00 73.00 ▂▅▇▆▁
登録からの月数 0 1 -0.50 6.92 -12 -6.25 -0.50 5.25 11.00 ▇▇▆▇▇
step_mean 0 1 5893.71 2976.58 2 3573.42 5695.32 7752.38 84714.43 ▇▁▁▁▁
step_sd 0 1 2955.37 1477.54 0 1978.24 2771.46 3671.65 47763.00 ▇▁▁▁▁
step_mean.wd 51 1 6210.87 3225.61 2 3631.09 6026.95 8276.27 84697.91 ▇▁▁▁▁
step_mean.we 275 1 5085.80 3278.90 2 2735.50 4435.88 6703.11 93572.88 ▇▁▁▁▁

#ヒストグラム作成

ggplot(data2,mapping = aes(x=step_mean))+
  geom_histogram(bins = 100)+
  theme_bw()

## 登録月と歩数の関係

se_data2 <- sd(data2$step_mean) / sqrt(length(data2$step_mean))

data2 %>% group_by(登録からの月数) %>% 
  summarise(mean_step=mean(step_mean),
            sd_step=sd(step_mean),
            se_step=sd_step / sqrt(n())) %>%
  ggplot(mapping = aes(x=登録からの月数,y=mean_step))+
  geom_line()+
  geom_point()+
  geom_errorbar(aes(ymin=mean_step-se_step,ymax=mean_step+se_step),width=0.1)+
  geom_vline(xintercept = 0,linetype="dotted")+
  xlab("registration_month")+ylab("mean_step")+
  theme_bw()

#週末と平日における歩数の違い(wd=平日、we=週末)

#平日
data2 %>% filter(!is.na(step_mean.wd)) %>% 
  group_by(登録からの月数) %>% 
  summarise(mean_step=mean(step_mean.wd),
            sd_step=sd(step_mean.wd),
            se_step=sd_step / sqrt(n())) %>%
  ggplot(mapping = aes(x=登録からの月数,y=mean_step))+
  geom_line()+
  geom_point()+
  geom_errorbar(aes(ymin=mean_step-se_step,ymax=mean_step+se_step),width=0.1)+
  geom_vline(xintercept = 0,linetype="dotted")+
  xlab("registration_month")+ylab("mean_step")+
  theme_bw()

#週末
data2 %>% filter(!is.na(step_mean.we)) %>% 
  group_by(登録からの月数) %>% 
  summarise(mean_step=mean(step_mean.we),
            sd_step=sd(step_mean.we),
            se_step=sd_step / sqrt(n())) %>%
  ggplot(mapping = aes(x=登録からの月数,y=mean_step))+
  geom_line()+
  geom_point()+
  geom_errorbar(aes(ymin=mean_step-se_step,ymax=mean_step+se_step),width=0.1)+
  geom_vline(xintercept = 0,linetype="dotted")+
  xlab("registration_month")+ylab("mean_step")+
  theme_bw()

#平日と週末の歩数について

data2_we <- data2 %>%
  pivot_longer(
    cols = c(step_mean.wd, step_mean.we),
    names_to = "day_type",
    values_to = "mean_step"
  ) %>%
  mutate(day_type = if_else(day_type == "step_mean.we", "Weekend", "Weekday")) %>%
 distinct(仮個人id,登録からの月数,day_type,.keep_all = T) %>%
  group_by(登録からの月数, day_type) %>%
  summarise(
    mean_step1 = mean(mean_step, na.rm = TRUE),
    sd_step = sd(mean_step, na.rm = TRUE),
    se_step = sd_step / sqrt(n()))
## `summarise()` has grouped output by '登録からの月数'. You can override using
## the `.groups` argument.
data2_we
## # A tibble: 48 × 5
## # Groups:   登録からの月数 [24]
##    登録からの月数 day_type mean_step1 sd_step se_step
##             <int> <chr>         <dbl>   <dbl>   <dbl>
##  1            -12 Weekday       5992.   3191.    28.4
##  2            -12 Weekend       4953.   3090.    27.5
##  3            -11 Weekday       5930.   3065.    27.3
##  4            -11 Weekend       4848.   3062.    27.3
##  5            -10 Weekday       5938.   3106.    27.7
##  6            -10 Weekend       4822.   3013.    26.8
##  7             -9 Weekday       5925.   3118.    27.8
##  8             -9 Weekend       4823.   3010.    26.8
##  9             -8 Weekday       5901.   3102.    27.6
## 10             -8 Weekend       4823.   3081.    27.4
## # ℹ 38 more rows

平日と週末における歩数の違い

  ggplot(data2_we, aes(x = 登録からの月数, y = mean_step1, color = day_type)) +
  geom_line() +
  geom_point() +
  geom_errorbar(aes(ymin = mean_step1 - se_step, ymax = mean_step1 + se_step)) +
  geom_vline(xintercept = 0, linetype = "dotted") +
  xlab("registration_month") + ylab("mean_step") +
  theme_bw() +
  scale_color_manual(values = c("Weekend" = "blue", "Weekday" = "red")) 

分析2-1

data3 %>% head()
##     仮個人id 性別_sfl 年齢_sfl kencom登録年月 step.diff.quintile
## 1 0yuPvFowq6     男性       66     2016-04-01                  1
## 2 1OwsKnMNaE     男性       39     2016-04-01                  3
## 3 0IliCoWCZV     男性       31     2016-04-01                  3
## 4 M0tEnZHNql     女性       51     2016-04-01                  2
## 5 PDnTXele4g     男性       26     2016-04-01                  5
## 6 AecvtKe8cE     男性       34     2016-05-01                  2
##   step.diff.median.by.group weight_nz.diff systolicBP_nz.diff
## 1                -963.51562     -4.1390000           1.204490
## 2                  62.69344     -3.1120477           5.615931
## 3                  62.69344      0.7284926          -2.445457
## 4                -327.36152      4.0614129          24.935567
## 5                1596.78921     -2.6122737          10.506455
## 6                -327.36152     -0.3897352          -4.238583
##   diastolicBP_nz.diff ldl_c_nz.diff hdl_c_nz.diff TG_nz.diff HbA1c_nz.diff
## 1           2.6720062     5.3590352     0.4610505 -15.515152   -0.48755945
## 2          -3.6108605    -0.3891425     1.9976718  -2.879915   -0.21745953
## 3          -2.5427406   -22.6019218     4.9481587  -2.196534   -0.06833491
## 4           1.9345428   -15.3831264    -2.2091562 -19.505292   -0.16728154
## 5           8.2586546   -18.5112023   -18.5583183 104.989170   -0.08458658
## 6          -0.4893504   -10.5642779   -11.3340042  11.296307   -0.02670434
##   step_mean.diff baseline健診受診年月 followup健診受診年月 weight_nz.b
## 1     -623.47945           2015-04-01           2017-04-01    66.89074
## 2      -12.98046           2015-06-01           2017-04-01    90.51351
## 3      143.71995           2015-05-01           2017-05-01    71.95757
## 4     -301.81007           2015-05-01           2017-05-01    67.16708
## 5     1174.52970           2015-05-01           2017-05-01    70.95344
## 6     -576.22646           2015-06-01           2017-05-01    65.04586
##   systolicBP_nz.b diastolicBP_nz.b ldl_c_nz.b hdl_c_nz.b   TG_nz.b HbA1c_nz.b
## 1        109.9507         71.62864   108.8772   44.50094  76.35546   5.631184
## 2        119.0751         87.45228   117.3376   45.69706 108.15197   6.440423
## 3        133.8459         78.75107   118.8808   58.35234  67.63990   5.337701
## 4        112.6972         80.13369   124.2131   54.28470 132.65872   5.760370
## 5        101.7182         56.37320   155.5491   63.88320  72.98456   5.292819
## 6        111.6170         68.20776   136.7508   71.41820  78.06746   5.156067
##   weight_nz.f systolicBP_nz.f diastolicBP_nz.f ldl_c_nz.f hdl_c_nz.f   TG_nz.f
## 1    62.75174        111.1552         74.30065  114.23626   44.96199  60.84030
## 2    87.40146        124.6911         83.84142  116.94841   47.69473 105.27205
## 3    72.68606        131.4004         76.20833   96.27893   63.30050  65.44337
## 4    71.22849        137.6327         82.06824  108.82998   52.07554 113.15343
## 5    68.34116        112.2247         64.63186  137.03787   45.32488 177.97374
## 6    64.65613        107.3784         67.71841  126.18648   60.08419  89.36376
##   HbA1c_nz.f
## 1   5.143625
## 2   6.222963
## 3   5.269366
## 4   5.593088
## 5   5.208233
## 6   5.129363
data3 <- data3 %>% 
  rename(gender = 性別_sfl, age = 年齢_sfl) %>%
  mutate(gender = if_else(gender == "男性", "male", "female"))
data3 %>% skimr::skim()
Data summary
Name Piped data
Number of rows 5473
Number of columns 30
_______________________
Column type frequency:
character 5
numeric 25
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
仮個人id 0 1 10 10 0 5473 0
gender 0 1 4 6 0 2 0
kencom登録年月 0 1 10 10 0 24 0
baseline健診受診年月 0 1 10 10 0 32 0
followup健診受診年月 0 1 10 10 0 24 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
age 0 1 44.86 11.10 18.00 37.00 46.00 53.00 73.00 ▂▅▇▆▁
step.diff.quintile 0 1 3.00 1.41 1.00 2.00 3.00 4.00 5.00 ▇▇▇▇▇
step.diff.median.by.group 0 1 177.03 860.34 -963.52 -327.36 62.69 516.39 1596.79 ▅▃▇▁▅
weight_nz.diff 0 1 0.36 4.11 -31.13 -2.06 0.44 2.92 19.59 ▁▁▅▇▁
systolicBP_nz.diff 0 1 1.09 13.39 -59.82 -7.55 1.11 9.60 52.65 ▁▂▇▃▁
diastolicBP_nz.diff 0 1 1.01 9.59 -37.00 -5.21 0.96 7.19 45.97 ▁▃▇▂▁
ldl_c_nz.diff 0 1 1.44 22.55 -143.84 -10.60 1.72 14.37 108.90 ▁▁▇▅▁
hdl_c_nz.diff 0 1 1.13 9.27 -43.70 -4.57 0.84 6.42 52.75 ▁▂▇▁▁
TG_nz.diff 0 1 -0.25 73.23 -1279.42 -19.13 0.07 19.70 1146.49 ▁▁▇▁▁
HbA1c_nz.diff 0 1 0.04 0.37 -4.51 -0.18 0.04 0.25 5.93 ▁▁▇▁▁
step_mean.diff 0 1 220.59 1308.52 -6205.96 -437.21 62.69 668.41 12842.29 ▁▇▁▁▁
weight_nz.b 0 1 64.09 12.38 32.24 54.99 63.60 71.81 139.68 ▃▇▂▁▁
systolicBP_nz.b 0 1 117.95 15.14 75.98 107.29 117.14 127.39 209.34 ▂▇▂▁▁
diastolicBP_nz.b 0 1 73.31 11.45 41.28 65.08 72.42 80.89 126.16 ▁▇▆▁▁
ldl_c_nz.b 0 1 120.87 30.34 28.98 99.45 118.56 140.01 276.22 ▁▇▅▁▁
hdl_c_nz.b 0 1 63.33 16.13 24.59 51.51 61.31 73.07 140.46 ▂▇▃▁▁
TG_nz.b 0 1 103.59 82.98 20.62 57.83 82.07 120.17 1450.27 ▇▁▁▁▁
HbA1c_nz.b 0 1 5.43 0.47 4.09 5.15 5.37 5.62 12.17 ▇▂▁▁▁
weight_nz.f 0 1 64.45 12.36 31.64 55.44 64.08 71.84 141.43 ▂▇▂▁▁
systolicBP_nz.f 0 1 119.05 15.35 77.58 108.21 118.54 129.03 204.01 ▂▇▃▁▁
diastolicBP_nz.f 0 1 74.32 11.39 41.49 66.35 73.70 81.85 119.35 ▁▇▇▂▁
ldl_c_nz.f 0 1 122.32 30.75 28.81 101.02 119.97 141.47 279.04 ▁▇▅▁▁
hdl_c_nz.f 0 1 64.46 16.63 25.77 52.39 62.23 74.44 151.11 ▃▇▃▁▁
TG_nz.f 0 1 103.34 82.45 19.96 57.97 81.88 123.17 1544.59 ▇▁▁▁▁
HbA1c_nz.f 0 1 5.46 0.49 4.05 5.18 5.41 5.65 12.56 ▇▂▁▁▁
colnames(data3)
##  [1] "仮個人id"                  "gender"                   
##  [3] "age"                       "kencom登録年月"           
##  [5] "step.diff.quintile"        "step.diff.median.by.group"
##  [7] "weight_nz.diff"            "systolicBP_nz.diff"       
##  [9] "diastolicBP_nz.diff"       "ldl_c_nz.diff"            
## [11] "hdl_c_nz.diff"             "TG_nz.diff"               
## [13] "HbA1c_nz.diff"             "step_mean.diff"           
## [15] "baseline健診受診年月"      "followup健診受診年月"     
## [17] "weight_nz.b"               "systolicBP_nz.b"          
## [19] "diastolicBP_nz.b"          "ldl_c_nz.b"               
## [21] "hdl_c_nz.b"                "TG_nz.b"                  
## [23] "HbA1c_nz.b"                "weight_nz.f"              
## [25] "systolicBP_nz.f"           "diastolicBP_nz.f"         
## [27] "ldl_c_nz.f"                "hdl_c_nz.f"               
## [29] "TG_nz.f"                   "HbA1c_nz.f"

#data3の確認

ggplot(data3,mapping = aes(x=step.diff.quintile,fill=gender))+
geom_bar()

hist(data3$age)

ggplot(data3, aes(x=age, fill=gender)) +
  geom_histogram(binwidth=1, position="identity",alpha=0.5) +
  scale_fill_brewer(palette="Set1") +
  labs(title="age", x="age", y="count") +
  theme_minimal()

hist(data3$step_mean.diff)

ggplot(data3,mapping = aes(x=weight_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3,mapping = aes(x=systolicBP_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3,mapping = aes(x=diastolicBP_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3,mapping = aes(x=ldl_c_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3,mapping = aes(x=hdl_c_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3,mapping = aes(x=TG_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3,mapping = aes(x=HbA1c_nz.diff,fill=gender))+
geom_histogram(bins = 100)

step_mean.diff=(step_mean.f - step_mean.b) HbA1c_nz.diff=(HbA1c_nz.f - HbA1c_nz.b)

ggplot(data3,mapping = aes(x=step_mean.diff,y=weight_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3,mapping = aes(x=step_mean.diff,y=systolicBP_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3,mapping = aes(x=step_mean.diff,y=diastolicBP_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3,mapping = aes(x=step_mean.diff,y=ldl_c_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3,mapping = aes(x=step_mean.diff,y=hdl_c_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3,mapping = aes(x=step_mean.diff,y=TG_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3,mapping = aes(x=step_mean.diff,y=HbA1c_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()
## `geom_smooth()` using formula = 'y ~ x'

data3_longer<- data3 %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "value"
  )
ggplot(data3_longer, aes(x = step_mean.diff, y = value, color = gender)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  facet_wrap(~measure_type, scales = "free") +
  theme_bw()
## `geom_smooth()` using formula = 'y ~ x'

data3 %>% group_by(gender,"HbA1c_nz.diff") %>% 
  summarise(mean_Hb=mean(HbA1c_nz.diff)) %>% 
  ggplot(mapping = aes(x=gender,y=mean_Hb,fill=gender))+
  geom_bar(stat="identity",position="dodge")+
  xlab("Gender") +ylab("HbA1c_diff")+
  theme_bw()
## `summarise()` has grouped output by 'gender'. You can override using the
## `.groups` argument.

t.test(HbA1c_nz.diff~gender,data=data3)
## 
##  Welch Two Sample t-test
## 
## data:  HbA1c_nz.diff by gender
## t = -0.55778, df = 5003.3, p-value = 0.577
## alternative hypothesis: true difference in means between group female and group male is not equal to 0
## 95 percent confidence interval:
##  -0.02555737  0.01423556
## sample estimates:
## mean in group female   mean in group male 
##           0.03397754           0.03963844

#ヒストグラム作成

#step.diff.quintileごとの平均年齢
data3 %>% 
  group_by(step.diff.quintile) %>% 
  summarise(mean_age=mean(age,na.rm=T)) %>%
  ggplot(mapping = aes(x=step.diff.quintile,y=mean_age))+
  geom_bar(stat="identity")+
  xlab("Step Difference Quintile") +
  ylab("Mean Age")+
  theme_bw()

#step.diff.quintileごとにおけるベースライン時と健診時の体重差

#step.diff.quintileごとにおけるベースライン時と健診時の体重差
data3 %>% 
  group_by(step.diff.quintile) %>% 
  summarise(mean_weight=mean(weight_nz.diff,na.rm=T),
                             sd_weight=sd(weight_nz.diff,na.rm=T),
                             se_weight=sd_weight / sqrt(n())) %>%
  ggplot(mapping = aes(x=step.diff.quintile,y=mean_weight))+
  geom_errorbar(aes(ymin=mean_weight-se_weight,ymax=mean_weight+se_weight),width=0.3)+
  geom_bar(stat="identity")+
  xlab("Step Difference Quintile") +
  ylab("Changing Weight(Mean Weight)")+
  theme_bw()

data3 %>% 
  group_by(step.diff.quintile, gender) %>% 
  summarise(mean_weight=mean(weight_nz.diff, na.rm=TRUE),
            sd_weight=sd(weight_nz.diff, na.rm=TRUE),
            se_weight=sd_weight / sqrt(n())) %>% 
  ggplot(mapping = aes(x=step.diff.quintile, y=mean_weight, fill=gender)) +
  geom_errorbar(aes(ymin=mean_weight-se_weight, ymax=mean_weight+se_weight), width=0.3,    
  position=position_dodge(width=0.5)) +
  geom_bar(stat="identity", position=position_dodge(width=0.5)) +
  xlab("Step Difference Quintile") +
  ylab("Changing Weight (Mean Weight)") +
  theme_bw() +
  scale_fill_manual(values = c("male" = "blue", "female" = "pink")) # ここで性別ごとの色を指定
## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.

歩数変化量と健診検査値変化量のグラフ

colnames(data3)
##  [1] "仮個人id"                  "gender"                   
##  [3] "age"                       "kencom登録年月"           
##  [5] "step.diff.quintile"        "step.diff.median.by.group"
##  [7] "weight_nz.diff"            "systolicBP_nz.diff"       
##  [9] "diastolicBP_nz.diff"       "ldl_c_nz.diff"            
## [11] "hdl_c_nz.diff"             "TG_nz.diff"               
## [13] "HbA1c_nz.diff"             "step_mean.diff"           
## [15] "baseline健診受診年月"      "followup健診受診年月"     
## [17] "weight_nz.b"               "systolicBP_nz.b"          
## [19] "diastolicBP_nz.b"          "ldl_c_nz.b"               
## [21] "hdl_c_nz.b"                "TG_nz.b"                  
## [23] "HbA1c_nz.b"                "weight_nz.f"              
## [25] "systolicBP_nz.f"           "diastolicBP_nz.f"         
## [27] "ldl_c_nz.f"                "hdl_c_nz.f"               
## [29] "TG_nz.f"                   "HbA1c_nz.f"
data3 %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.quintile, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n())) %>%
  ggplot(mapping = aes(x = step.diff.quintile, y =mean)) +
  geom_bar(stat = "identity",position = "dodge") +
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se),width=0.2)+
  facet_wrap(~measure_type,scales = "free")+
  theme_classic()
## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.

data3 %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.quintile, measure_type, gender) %>%
  summarise(mean = mean(diff_value, na.rm = TRUE),
            sd = sd(diff_value, na.rm = TRUE),
            se = sd / sqrt(n())) %>%
  ggplot(mapping = aes(x = step.diff.quintile, y = mean, fill = gender)) +
  geom_bar(stat = "identity", position = position_dodge()) +  
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.2, position = position_dodge(0.9)) +
  facet_wrap(~measure_type, scales = "free") +
  scale_fill_manual(values = c("male" = "blue", "female" = "red")) +  
  theme_classic()
## `summarise()` has grouped output by 'step.diff.quintile', 'measure_type'. You
## can override using the `.groups` argument.

分析2-2

健診検査値変化量 = Β x step.diff.median.by.group+切片(intercept)

data3 %>% head()
##     仮個人id gender age kencom登録年月 step.diff.quintile
## 1 0yuPvFowq6   male  66     2016-04-01                  1
## 2 1OwsKnMNaE   male  39     2016-04-01                  3
## 3 0IliCoWCZV   male  31     2016-04-01                  3
## 4 M0tEnZHNql female  51     2016-04-01                  2
## 5 PDnTXele4g   male  26     2016-04-01                  5
## 6 AecvtKe8cE   male  34     2016-05-01                  2
##   step.diff.median.by.group weight_nz.diff systolicBP_nz.diff
## 1                -963.51562     -4.1390000           1.204490
## 2                  62.69344     -3.1120477           5.615931
## 3                  62.69344      0.7284926          -2.445457
## 4                -327.36152      4.0614129          24.935567
## 5                1596.78921     -2.6122737          10.506455
## 6                -327.36152     -0.3897352          -4.238583
##   diastolicBP_nz.diff ldl_c_nz.diff hdl_c_nz.diff TG_nz.diff HbA1c_nz.diff
## 1           2.6720062     5.3590352     0.4610505 -15.515152   -0.48755945
## 2          -3.6108605    -0.3891425     1.9976718  -2.879915   -0.21745953
## 3          -2.5427406   -22.6019218     4.9481587  -2.196534   -0.06833491
## 4           1.9345428   -15.3831264    -2.2091562 -19.505292   -0.16728154
## 5           8.2586546   -18.5112023   -18.5583183 104.989170   -0.08458658
## 6          -0.4893504   -10.5642779   -11.3340042  11.296307   -0.02670434
##   step_mean.diff baseline健診受診年月 followup健診受診年月 weight_nz.b
## 1     -623.47945           2015-04-01           2017-04-01    66.89074
## 2      -12.98046           2015-06-01           2017-04-01    90.51351
## 3      143.71995           2015-05-01           2017-05-01    71.95757
## 4     -301.81007           2015-05-01           2017-05-01    67.16708
## 5     1174.52970           2015-05-01           2017-05-01    70.95344
## 6     -576.22646           2015-06-01           2017-05-01    65.04586
##   systolicBP_nz.b diastolicBP_nz.b ldl_c_nz.b hdl_c_nz.b   TG_nz.b HbA1c_nz.b
## 1        109.9507         71.62864   108.8772   44.50094  76.35546   5.631184
## 2        119.0751         87.45228   117.3376   45.69706 108.15197   6.440423
## 3        133.8459         78.75107   118.8808   58.35234  67.63990   5.337701
## 4        112.6972         80.13369   124.2131   54.28470 132.65872   5.760370
## 5        101.7182         56.37320   155.5491   63.88320  72.98456   5.292819
## 6        111.6170         68.20776   136.7508   71.41820  78.06746   5.156067
##   weight_nz.f systolicBP_nz.f diastolicBP_nz.f ldl_c_nz.f hdl_c_nz.f   TG_nz.f
## 1    62.75174        111.1552         74.30065  114.23626   44.96199  60.84030
## 2    87.40146        124.6911         83.84142  116.94841   47.69473 105.27205
## 3    72.68606        131.4004         76.20833   96.27893   63.30050  65.44337
## 4    71.22849        137.6327         82.06824  108.82998   52.07554 113.15343
## 5    68.34116        112.2247         64.63186  137.03787   45.32488 177.97374
## 6    64.65613        107.3784         67.71841  126.18648   60.08419  89.36376
##   HbA1c_nz.f
## 1   5.143625
## 2   6.222963
## 3   5.269366
## 4   5.593088
## 5   5.208233
## 6   5.129363

##step.diff.median.by.groupについて

unique(data3$step.diff.median.by.group)
## [1] -963.51562   62.69344 -327.36152 1596.78921  516.39142
n_distinct(data3$step.diff.median.by.group)
## [1] 5
summary(data3$step.diff.median.by.group)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -963.52 -327.36   62.69  177.03  516.39 1596.79
str(data3$step.diff.median.by.group)
##  num [1:5473] -963.5 62.7 62.7 -327.4 1596.8 ...
data4<-data3 %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.median.by.group, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n())) 
## `summarise()` has grouped output by 'step.diff.median.by.group'. You can
## override using the `.groups` argument.
data4 %>% head()
## # A tibble: 6 × 5
## # Groups:   step.diff.median.by.group [1]
##   step.diff.median.by.group measure_type          mean     sd     se
##                       <dbl> <chr>                <dbl>  <dbl>  <dbl>
## 1                     -964. HbA1c_nz.diff       0.0488  0.426 0.0129
## 2                     -964. TG_nz.diff          3.25   83.9   2.54  
## 3                     -964. diastolicBP_nz.diff 1.39    9.12  0.276 
## 4                     -964. hdl_c_nz.diff       0.196   9.42  0.285 
## 5                     -964. ldl_c_nz.diff       3.10   23.6   0.712 
## 6                     -964. systolicBP_nz.diff  1.78   12.9   0.389
#dBP
DBP<-
  data4 %>% 
  filter(measure_type=="diastolicBP_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="dBP_diff",.before=Estimate)
#sBP
SBP<-
  data4 %>% 
  filter(measure_type=="systolicBP_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="sBP_diff",.before=Estimate)
#weight
weight<-
  data4 %>% 
  filter(measure_type=="weight_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="weight_diff",.before=Estimate)
#HbA1c
HbA1c<-
  data4 %>% 
  filter(measure_type=="HbA1c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="HbA1c_diff",.before=Estimate)
#TG
TG<-
  data4 %>% 
  filter(measure_type=="TG_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="TG_diff",.before=Estimate)
#HDL
HDL<-
  data4 %>% 
  filter(measure_type=="hdl_c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="HDL_diff",.before=Estimate)
#LDL
LDL<-
  data4 %>% 
  filter(measure_type=="ldl_c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="LDL_diff",.before=Estimate)
data5<-bind_rows(DBP,SBP,weight,HbA1c,TG,HDL,LDL)
data5 
##                                      term      Estimate     p_value
## step.diff.median.by.group...1    dBP_diff -2.890070e-04 0.106801745
## step.diff.median.by.group...2    sBP_diff -4.237653e-04 0.022124846
## step.diff.median.by.group...3 weight_diff -3.551869e-04 0.003169958
## step.diff.median.by.group...4  HbA1c_diff -1.468281e-05 0.020568124
## step.diff.median.by.group...5     TG_diff -2.401945e-03 0.279411725
## step.diff.median.by.group...6    HDL_diff  7.970896e-04 0.007896832
## step.diff.median.by.group...7    LDL_diff -5.979974e-04 0.288273409

全体のsBP,weight,HbA1c,HDLについては有意差あり

data5 %>% 
  mutate("p<0.05"=if_else(p_value<0.05,"*","")) 
##                                      term      Estimate     p_value p<0.05
## step.diff.median.by.group...1    dBP_diff -2.890070e-04 0.106801745       
## step.diff.median.by.group...2    sBP_diff -4.237653e-04 0.022124846      *
## step.diff.median.by.group...3 weight_diff -3.551869e-04 0.003169958      *
## step.diff.median.by.group...4  HbA1c_diff -1.468281e-05 0.020568124      *
## step.diff.median.by.group...5     TG_diff -2.401945e-03 0.279411725       
## step.diff.median.by.group...6    HDL_diff  7.970896e-04 0.007896832      *
## step.diff.median.by.group...7    LDL_diff -5.979974e-04 0.288273409

#step.diff.median.by.groupを5つに分類して各検査値の変化量を比較

data6 <- data3 %>% 
  mutate(
   step.diff.median.group = round(step.diff.median.by.group, 3),
   step.diff.category = case_when(
   step.diff.median.group == round(-963.51562, 3) ~ 1,
   step.diff.median.group == round(-327.36152, 3) ~ 2,
   step.diff.median.group == round(62.69344, 3) ~ 3,
   step.diff.median.group == round(516.39142, 3) ~ 4,
   step.diff.median.group == round(1596.78921, 3) ~ 5,
   TRUE ~ NA                     # どの条件にも当てはまらない場合、NAを割り当てる
    )
  )
data6 %>% head()
##     仮個人id gender age kencom登録年月 step.diff.quintile
## 1 0yuPvFowq6   male  66     2016-04-01                  1
## 2 1OwsKnMNaE   male  39     2016-04-01                  3
## 3 0IliCoWCZV   male  31     2016-04-01                  3
## 4 M0tEnZHNql female  51     2016-04-01                  2
## 5 PDnTXele4g   male  26     2016-04-01                  5
## 6 AecvtKe8cE   male  34     2016-05-01                  2
##   step.diff.median.by.group weight_nz.diff systolicBP_nz.diff
## 1                -963.51562     -4.1390000           1.204490
## 2                  62.69344     -3.1120477           5.615931
## 3                  62.69344      0.7284926          -2.445457
## 4                -327.36152      4.0614129          24.935567
## 5                1596.78921     -2.6122737          10.506455
## 6                -327.36152     -0.3897352          -4.238583
##   diastolicBP_nz.diff ldl_c_nz.diff hdl_c_nz.diff TG_nz.diff HbA1c_nz.diff
## 1           2.6720062     5.3590352     0.4610505 -15.515152   -0.48755945
## 2          -3.6108605    -0.3891425     1.9976718  -2.879915   -0.21745953
## 3          -2.5427406   -22.6019218     4.9481587  -2.196534   -0.06833491
## 4           1.9345428   -15.3831264    -2.2091562 -19.505292   -0.16728154
## 5           8.2586546   -18.5112023   -18.5583183 104.989170   -0.08458658
## 6          -0.4893504   -10.5642779   -11.3340042  11.296307   -0.02670434
##   step_mean.diff baseline健診受診年月 followup健診受診年月 weight_nz.b
## 1     -623.47945           2015-04-01           2017-04-01    66.89074
## 2      -12.98046           2015-06-01           2017-04-01    90.51351
## 3      143.71995           2015-05-01           2017-05-01    71.95757
## 4     -301.81007           2015-05-01           2017-05-01    67.16708
## 5     1174.52970           2015-05-01           2017-05-01    70.95344
## 6     -576.22646           2015-06-01           2017-05-01    65.04586
##   systolicBP_nz.b diastolicBP_nz.b ldl_c_nz.b hdl_c_nz.b   TG_nz.b HbA1c_nz.b
## 1        109.9507         71.62864   108.8772   44.50094  76.35546   5.631184
## 2        119.0751         87.45228   117.3376   45.69706 108.15197   6.440423
## 3        133.8459         78.75107   118.8808   58.35234  67.63990   5.337701
## 4        112.6972         80.13369   124.2131   54.28470 132.65872   5.760370
## 5        101.7182         56.37320   155.5491   63.88320  72.98456   5.292819
## 6        111.6170         68.20776   136.7508   71.41820  78.06746   5.156067
##   weight_nz.f systolicBP_nz.f diastolicBP_nz.f ldl_c_nz.f hdl_c_nz.f   TG_nz.f
## 1    62.75174        111.1552         74.30065  114.23626   44.96199  60.84030
## 2    87.40146        124.6911         83.84142  116.94841   47.69473 105.27205
## 3    72.68606        131.4004         76.20833   96.27893   63.30050  65.44337
## 4    71.22849        137.6327         82.06824  108.82998   52.07554 113.15343
## 5    68.34116        112.2247         64.63186  137.03787   45.32488 177.97374
## 6    64.65613        107.3784         67.71841  126.18648   60.08419  89.36376
##   HbA1c_nz.f step.diff.median.group step.diff.category
## 1   5.143625               -963.516                  1
## 2   6.222963                 62.693                  3
## 3   5.269366                 62.693                  3
## 4   5.593088               -327.362                  2
## 5   5.208233               1596.789                  5
## 6   5.129363               -327.362                  2
data6 %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.category, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n())) %>%
  ggplot(mapping = aes(x = step.diff.category, y =mean)) +
  geom_bar(stat = "identity") +
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se),width=0.2)+
  facet_wrap(~measure_type,scales = "free")+
  theme_classic()
## `summarise()` has grouped output by 'step.diff.category'. You can override
## using the `.groups` argument.

data6 %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.category, measure_type, gender) %>%  # 追加: gender
  summarise(mean = mean(diff_value, na.rm = TRUE),
            sd = sd(diff_value, na.rm = TRUE),
            se = sd / sqrt(n())) %>%
  ggplot(mapping = aes(x = step.diff.category, y = mean, fill = gender)) + 
  geom_bar(stat = "identity", position = position_dodge()) +  
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.2, position = position_dodge(0.9)) +
  facet_wrap(~measure_type, scales = "free") +
  scale_fill_manual(values = c("male" = "blue", "female" = "red")) +  
  theme_classic()
## `summarise()` has grouped output by 'step.diff.category', 'measure_type'. You
## can override using the `.groups` argument.

#write_csv(data6, "/Users/abekohei/Downloads/2_1日目講義前配布用/script_for_provision/data6.csv")

分析追加

性差での違いを調べる

data3 %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.quintile, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n())) %>%
  ggplot(mapping = aes(x = step.diff.quintile, y =mean)) +
  geom_bar(stat = "identity",position = "dodge") +
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se),width=0.2)+
  facet_wrap(~measure_type,scales = "free")+
  theme_classic()
## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.

data3 %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.quintile, measure_type, gender) %>%
  summarise(mean = mean(diff_value, na.rm = TRUE),
            sd = sd(diff_value, na.rm = TRUE),
            se = sd / sqrt(n())) %>%
  ggplot(mapping = aes(x = step.diff.quintile, y = mean, fill = gender)) +
  geom_bar(stat = "identity", position = position_dodge()) +  
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.2, position = position_dodge(0.9)) +
  facet_wrap(~measure_type, scales = "free") +
  scale_fill_manual(values = c("male" = "blue", "female" = "red")) +  
  theme_classic()
## `summarise()` has grouped output by 'step.diff.quintile', 'measure_type'. You
## can override using the `.groups` argument.

男性データのみを抽出

data_male<-data3 %>% filter(gender=="male")
df_male1<-data_male %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.median.by.group, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n())) 
## `summarise()` has grouped output by 'step.diff.median.by.group'. You can
## override using the `.groups` argument.
#dBP
DBP_male<-
  df_male1 %>% 
  filter(measure_type=="diastolicBP_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="dBP_diff",.before=Estimate)
#sBP
SBP_male<-
  df_male1 %>% 
  filter(measure_type=="systolicBP_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="sBP_diff",.before=Estimate)
#weight
weight_male<-
  df_male1 %>% 
  filter(measure_type=="weight_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="weight_diff",.before=Estimate)
#HbA1c
HbA1c_male<-
  df_male1 %>% 
  filter(measure_type=="HbA1c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="HbA1c_diff",.before=Estimate)
#TG
TG_male<-
  df_male1 %>% 
  filter(measure_type=="TG_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="TG_diff",.before=Estimate)
#HDL
HDL_male<-
  df_male1 %>% 
  filter(measure_type=="hdl_c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="HDL_diff",.before=Estimate)
#LDL
LDL_male<-
  df_male1 %>% 
  filter(measure_type=="ldl_c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="LDL_diff",.before=Estimate)
df5_male<-bind_rows(DBP_male,SBP_male,weight_male,HbA1c_male,TG_male,HDL_male,LDL_male)
df5_male 
##                                      term      Estimate     p_value
## step.diff.median.by.group...1    dBP_diff -7.193161e-04 0.034426241
## step.diff.median.by.group...2    sBP_diff -6.272823e-04 0.116603247
## step.diff.median.by.group...3 weight_diff -3.520797e-04 0.019218203
## step.diff.median.by.group...4  HbA1c_diff -1.805442e-05 0.114183801
## step.diff.median.by.group...5     TG_diff -2.843390e-03 0.192499844
## step.diff.median.by.group...6    HDL_diff  8.258661e-04 0.008869557
## step.diff.median.by.group...7    LDL_diff -2.149112e-04 0.719694014

男性データではdBP_diff、weight_diff、HDL_diffについて有意差あり

df5_male %>% 
  mutate("p<0.05"=if_else(p_value<0.05,"*","")) 
##                                      term      Estimate     p_value p<0.05
## step.diff.median.by.group...1    dBP_diff -7.193161e-04 0.034426241      *
## step.diff.median.by.group...2    sBP_diff -6.272823e-04 0.116603247       
## step.diff.median.by.group...3 weight_diff -3.520797e-04 0.019218203      *
## step.diff.median.by.group...4  HbA1c_diff -1.805442e-05 0.114183801       
## step.diff.median.by.group...5     TG_diff -2.843390e-03 0.192499844       
## step.diff.median.by.group...6    HDL_diff  8.258661e-04 0.008869557      *
## step.diff.median.by.group...7    LDL_diff -2.149112e-04 0.719694014

女性データのみを抽出

data_female<-data3 %>% filter(gender=="female")
df_female1<-data_female %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.median.by.group, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n())) 
## `summarise()` has grouped output by 'step.diff.median.by.group'. You can
## override using the `.groups` argument.
#dBP
DBP_female<-
  df_female1 %>% 
  filter(measure_type=="diastolicBP_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="dBP_diff",.before=Estimate)
#sBP
SBP_female<-
  df_female1 %>% 
  filter(measure_type=="systolicBP_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="sBP_diff",.before=Estimate)
#weight
weight_female<-
  df_female1 %>% 
  filter(measure_type=="weight_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="weight_diff",.before=Estimate)
#HbA1c
HbA1c_female<-
  df_female1 %>% 
  filter(measure_type=="HbA1c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="HbA1c_diff",.before=Estimate)
#TG
TG_female<-
  df_female1 %>% 
  filter(measure_type=="TG_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="TG_diff",.before=Estimate)
#HDL
HDL_female<-
  df_female1 %>% 
  filter(measure_type=="hdl_c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="HDL_diff",.before=Estimate)
#LDL
LDL_female<-
  df_female1 %>% 
  filter(measure_type=="ldl_c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="LDL_diff",.before=Estimate)
df5_female<-bind_rows(DBP_female,SBP_female,weight_female,HbA1c_female,TG_female,HDL_female,LDL_female)
df5_female 
##                                      term      Estimate    p_value
## step.diff.median.by.group...1    dBP_diff  3.640916e-04 0.03953484
## step.diff.median.by.group...2    sBP_diff -1.192206e-04 0.77770724
## step.diff.median.by.group...3 weight_diff -3.569323e-04 0.01954224
## step.diff.median.by.group...4  HbA1c_diff -9.891568e-06 0.18287912
## step.diff.median.by.group...5     TG_diff -1.700495e-03 0.52461853
## step.diff.median.by.group...6    HDL_diff  7.601839e-04 0.01828471
## step.diff.median.by.group...7    LDL_diff -1.163230e-03 0.09320509

女性データではdBP_diff、weight_diff、HDL_diffについて有意差あり

df5_female %>% 
  mutate("p<0.05"=if_else(p_value<0.05,"*","")) 
##                                      term      Estimate    p_value p<0.05
## step.diff.median.by.group...1    dBP_diff  3.640916e-04 0.03953484      *
## step.diff.median.by.group...2    sBP_diff -1.192206e-04 0.77770724       
## step.diff.median.by.group...3 weight_diff -3.569323e-04 0.01954224      *
## step.diff.median.by.group...4  HbA1c_diff -9.891568e-06 0.18287912       
## step.diff.median.by.group...5     TG_diff -1.700495e-03 0.52461853       
## step.diff.median.by.group...6    HDL_diff  7.601839e-04 0.01828471      *
## step.diff.median.by.group...7    LDL_diff -1.163230e-03 0.09320509

心血管リスク因子(HT,DL,DM)一つでも該当する者を対象とする

HT=sBP130以上orDL=LDL:140以上orHDL:40未満orTG:175以上orDM=6.5

data3_highrisk<-data3 %>% 
  filter(systolicBP_nz.b>=130 | ldl_c_nz.b>=140 | hdl_c_nz.b<40 | TG_nz.b>=175 | HbA1c_nz.b>=6.5) 
data3_highrisk %>% nrow()
## [1] 2449
data3_highrisk %>% skim()
Data summary
Name Piped data
Number of rows 2449
Number of columns 30
_______________________
Column type frequency:
character 5
numeric 25
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
仮個人id 0 1 10 10 0 2449 0
gender 0 1 4 6 0 2 0
kencom登録年月 0 1 10 10 0 24 0
baseline健診受診年月 0 1 10 10 0 30 0
followup健診受診年月 0 1 10 10 0 23 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
age 0 1 44.97 11.06 18.00 37.00 46.00 53.00 71.00 ▂▆▇▇▂
step.diff.quintile 0 1 3.01 1.42 1.00 2.00 3.00 4.00 5.00 ▇▇▇▇▇
step.diff.median.by.group 0 1 182.03 862.23 -963.52 -327.36 62.69 516.39 1596.79 ▅▅▇▁▅
weight_nz.diff 0 1 0.08 4.39 -31.13 -2.38 0.20 2.76 19.59 ▁▁▅▇▁
systolicBP_nz.diff 0 1 -1.36 14.36 -59.82 -10.51 -1.26 8.19 52.65 ▁▂▇▃▁
diastolicBP_nz.diff 0 1 0.28 10.04 -37.00 -6.16 0.51 6.96 45.97 ▁▅▇▂▁
ldl_c_nz.diff 0 1 -2.53 26.60 -143.84 -16.50 -1.30 13.35 98.39 ▁▁▇▇▁
hdl_c_nz.diff 0 1 1.18 8.91 -36.25 -4.26 0.93 6.33 52.75 ▁▆▇▁▁
TG_nz.diff 0 1 -7.55 99.27 -1279.42 -30.67 -3.42 22.42 1146.49 ▁▁▇▁▁
HbA1c_nz.diff 0 1 0.06 0.45 -4.51 -0.16 0.06 0.27 5.93 ▁▁▇▁▁
step_mean.diff 0 1 248.10 1328.27 -4286.43 -442.90 68.12 687.88 10651.17 ▁▇▁▁▁
weight_nz.b 0 1 68.45 12.38 37.62 60.23 68.06 75.84 125.84 ▂▇▅▁▁
systolicBP_nz.b 0 1 126.01 16.05 81.63 114.44 127.11 136.04 209.34 ▂▇▅▁▁
diastolicBP_nz.b 0 1 78.32 11.65 44.97 70.29 78.13 86.02 126.16 ▁▇▇▂▁
ldl_c_nz.b 0 1 138.83 31.24 30.71 116.48 142.68 158.84 276.22 ▁▅▇▁▁
hdl_c_nz.b 0 1 59.09 16.18 24.59 47.49 56.80 68.42 140.46 ▃▇▂▁▁
TG_nz.b 0 1 139.89 108.49 23.75 76.56 110.50 169.93 1450.27 ▇▁▁▁▁
HbA1c_nz.b 0 1 5.54 0.59 4.11 5.21 5.45 5.72 12.17 ▇▂▁▁▁
weight_nz.f 0 1 68.53 12.35 38.57 60.55 67.86 75.87 124.70 ▂▇▅▁▁
systolicBP_nz.f 0 1 124.66 15.29 82.37 114.66 123.93 133.82 204.01 ▂▇▅▁▁
diastolicBP_nz.f 0 1 78.60 11.18 41.49 71.04 78.39 86.03 119.35 ▁▅▇▃▁
ldl_c_nz.f 0 1 136.30 32.06 44.80 114.14 135.71 156.69 279.04 ▂▇▇▁▁
hdl_c_nz.f 0 1 60.27 16.66 25.77 48.25 57.54 69.14 150.95 ▅▇▂▁▁
TG_nz.f 0 1 132.34 104.76 20.33 73.02 108.35 158.94 1544.59 ▇▁▁▁▁
HbA1c_nz.f 0 1 5.60 0.60 4.32 5.26 5.51 5.77 12.56 ▇▁▁▁▁
data3_highrisk %>% head()
##     仮個人id gender age kencom登録年月 step.diff.quintile
## 1 0IliCoWCZV   male  31     2016-04-01                  3
## 2 PDnTXele4g   male  26     2016-04-01                  5
## 3 EPbHvE0cSI female  22     2016-05-01                  2
## 4 qBqHi7ri2W   male  42     2016-04-01                  2
## 5 TRZYorNdbw female  32     2016-05-01                  3
## 6 ULSKJQhwhT   male  25     2016-06-01                  1
##   step.diff.median.by.group weight_nz.diff systolicBP_nz.diff
## 1                  62.69344      0.7284926          -2.445457
## 2                1596.78921     -2.6122737          10.506455
## 3                -327.36152     -4.4093780           2.458827
## 4                -327.36152     -2.2889677           8.490237
## 5                  62.69344      8.3624345         -16.204654
## 6                -963.51562      3.7261152          28.878772
##   diastolicBP_nz.diff ldl_c_nz.diff hdl_c_nz.diff TG_nz.diff HbA1c_nz.diff
## 1           -2.542741    -22.601922      4.948159  -2.196534   -0.06833491
## 2            8.258655    -18.511202    -18.558318 104.989170   -0.08458658
## 3           -4.471324    -41.671288    -11.151447  25.578212    0.36740637
## 4           10.265643      4.338429     -8.284412  20.048275    0.36238565
## 5           -5.522645    -20.854080    -12.814424  74.910630   -0.28642599
## 6            3.430650      7.664912      6.415059  -2.612318   -0.06264475
##   step_mean.diff baseline健診受診年月 followup健診受診年月 weight_nz.b
## 1       143.7200           2015-05-01           2017-05-01    71.95757
## 2      1174.5297           2015-05-01           2017-05-01    70.95344
## 3      -466.6492           2015-05-01           2017-06-01    70.90351
## 4      -490.7139           2015-05-01           2017-06-01    70.25011
## 5       129.7318           2015-06-01           2017-06-01    71.35653
## 6      -635.0747           2015-06-01           2017-06-01    57.70602
##   systolicBP_nz.b diastolicBP_nz.b ldl_c_nz.b hdl_c_nz.b   TG_nz.b HbA1c_nz.b
## 1       133.84588         78.75107   118.8808   58.35234  67.63990   5.337701
## 2       101.71822         56.37320   155.5491   63.88320  72.98456   5.292819
## 3       115.42029         73.12555   165.4906   48.76714 111.67693   5.506758
## 4        93.26939         55.46555   152.4000   51.88232  99.73815   4.618830
## 5       112.29083         57.39880   142.6868   58.44698  77.56937   5.520969
## 6       139.42054         76.05343   127.1050   66.96992  59.32766   5.753736
##   weight_nz.f systolicBP_nz.f diastolicBP_nz.f ldl_c_nz.f hdl_c_nz.f   TG_nz.f
## 1    72.68606       131.40042         76.20833   96.27893   63.30050  65.44337
## 2    68.34116       112.22468         64.63186  137.03787   45.32488 177.97374
## 3    66.49414       117.87911         68.65423  123.81931   37.61569 137.25515
## 4    67.96114       101.75963         65.73120  156.73848   43.59791 119.78642
## 5    79.71897        96.08617         51.87616  121.83272   45.63256 152.48000
## 6    61.43213       168.29932         79.48408  134.76990   73.38498  56.71534
##   HbA1c_nz.f
## 1   5.269366
## 2   5.208233
## 3   5.874164
## 4   4.981216
## 5   5.234543
## 6   5.691091
ggplot(data3_highrisk,mapping = aes(x=age,fill=gender))+
  geom_histogram(binwidth=5, position="identity",alpha=0.5) +
  facet_wrap(~gender,scales = "free")+
  theme_bw()

data3_highrisk %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.quintile, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n())) %>%
  ggplot(mapping = aes(x = step.diff.quintile, y =mean)) +
  geom_bar(stat = "identity",position = "dodge") +
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se),width=0.2)+
  facet_wrap(~measure_type,scales = "free")+
  theme_classic()
## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.

data4_highrisk<-data3_highrisk %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.median.by.group, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n())) 
## `summarise()` has grouped output by 'step.diff.median.by.group'. You can
## override using the `.groups` argument.
data4_highrisk %>% head()
## # A tibble: 6 × 5
## # Groups:   step.diff.median.by.group [1]
##   step.diff.median.by.group measure_type           mean      sd     se
##                       <dbl> <chr>                 <dbl>   <dbl>  <dbl>
## 1                     -964. HbA1c_nz.diff        0.0698   0.538 0.0244
## 2                     -964. TG_nz.diff          -6.20   110.    5.01  
## 3                     -964. diastolicBP_nz.diff  0.969    9.61  0.436 
## 4                     -964. hdl_c_nz.diff        0.266    8.90  0.404 
## 5                     -964. ldl_c_nz.diff       -0.860   27.0   1.23  
## 6                     -964. systolicBP_nz.diff   0.146   14.3   0.650
#dBP
DBP_highrisk<-
  data4_highrisk %>% 
  filter(measure_type=="diastolicBP_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>%
  filter(row.names(.)=="step.diff.median.by.group") %>%
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
  mutate(term="dBP_diff",.before=Estimate)
#sBP
SBP_highrisk<-
  data4_highrisk %>% 
  filter(measure_type=="systolicBP_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="sBP_diff",.before=Estimate)
#weight
weight_highrisk<-
  data4_highrisk %>% 
  filter(measure_type=="weight_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="weight_diff",.before=Estimate)
#HbA1c
HbA1c_highrisk<-
  data4_highrisk %>% 
  filter(measure_type=="HbA1c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="HbA1c_diff",.before=Estimate)
#TG
TG_highrisk<-
  data4_highrisk %>% 
  filter(measure_type=="TG_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="TG_diff",.before=Estimate)
#HDL
HDL_highrisk<-
  data4_highrisk %>% 
  filter(measure_type=="hdl_c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="HDL_diff",.before=Estimate)
#LDL
LDL_highrisk<-
  data4_highrisk %>% 
  filter(measure_type=="ldl_c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="LDL_diff",.before=Estimate)
data5_highrisk<-bind_rows(DBP_highrisk,SBP_highrisk,weight_highrisk,HbA1c_highrisk,TG_highrisk,HDL_highrisk,LDL_highrisk)
data5_highrisk 
##                                      term      Estimate     p_value
## step.diff.median.by.group...1    dBP_diff -6.197701e-04 0.019402126
## step.diff.median.by.group...2    sBP_diff -9.449150e-04 0.018892579
## step.diff.median.by.group...3 weight_diff -5.140357e-04 0.000219201
## step.diff.median.by.group...4  HbA1c_diff -1.941723e-05 0.015689553
## step.diff.median.by.group...5     TG_diff -1.804097e-03 0.589938244
## step.diff.median.by.group...6    HDL_diff  9.991996e-04 0.013094852
## step.diff.median.by.group...7    LDL_diff -5.655664e-04 0.444863251

心血管リスク因子を一つでも持つ者のdBP,sBP,weight,HbA1c,HDLについては有意差あり

data5_highrisk %>% 
  mutate("p<0.05"=if_else(p_value<0.05,"*","")) 
##                                      term      Estimate     p_value p<0.05
## step.diff.median.by.group...1    dBP_diff -6.197701e-04 0.019402126      *
## step.diff.median.by.group...2    sBP_diff -9.449150e-04 0.018892579      *
## step.diff.median.by.group...3 weight_diff -5.140357e-04 0.000219201      *
## step.diff.median.by.group...4  HbA1c_diff -1.941723e-05 0.015689553      *
## step.diff.median.by.group...5     TG_diff -1.804097e-03 0.589938244       
## step.diff.median.by.group...6    HDL_diff  9.991996e-04 0.013094852      *
## step.diff.median.by.group...7    LDL_diff -5.655664e-04 0.444863251

data3でベースライン時に心血管ハイリスク患者(HT,DL,DM:3つある)に限定して調べる

HT=sBP130以上、DL=LDL:140以上orHDL:40未満orTG:175以上、DM=6.5

data3_high<-data3 %>% 
  filter(systolicBP_nz.b>=130) %>% 
  filter(ldl_c_nz.b>=140 | hdl_c_nz.b<40 | TG_nz.b>=175) %>% 
  filter(HbA1c_nz.b>=6.5) 
data3_high %>% nrow()
## [1] 31
data3_high %>% skim()
Data summary
Name Piped data
Number of rows 31
Number of columns 30
_______________________
Column type frequency:
character 5
numeric 25
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
仮個人id 0 1 10 10 0 31 0
gender 0 1 4 6 0 2 0
kencom登録年月 0 1 10 10 0 9 0
baseline健診受診年月 0 1 10 10 0 12 0
followup健診受診年月 0 1 10 10 0 8 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
age 0 1 45.00 12.80 23.00 35.50 43.00 53.00 71.00 ▅▇▆▅▃
step.diff.quintile 0 1 3.00 1.71 1.00 1.00 3.00 5.00 5.00 ▇▃▂▃▇
step.diff.median.by.group 0 1 234.74 1073.62 -963.52 -963.52 62.69 1596.79 1596.79 ▇▃▆▁▇
weight_nz.diff 0 1 -2.81 4.15 -13.73 -4.92 -2.81 -0.24 6.22 ▁▅▇▇▂
systolicBP_nz.diff 0 1 -8.50 18.52 -51.36 -15.55 -9.64 5.73 25.19 ▂▂▇▅▂
diastolicBP_nz.diff 0 1 -6.37 10.96 -24.31 -14.70 -4.82 0.37 14.28 ▅▆▇▃▅
ldl_c_nz.diff 0 1 -9.39 51.50 -143.84 -25.34 -4.79 18.95 88.81 ▁▂▇▇▂
hdl_c_nz.diff 0 1 2.26 9.68 -20.54 -2.14 2.78 7.66 24.60 ▂▂▇▃▁
TG_nz.diff 0 1 -32.13 269.77 -504.79 -114.13 -28.89 9.30 1146.49 ▂▇▁▁▁
HbA1c_nz.diff 0 1 -0.06 2.04 -4.51 -1.02 0.08 0.44 5.93 ▂▃▇▁▁
step_mean.diff 0 1 164.75 1371.49 -2638.22 -637.24 87.20 1128.13 3017.17 ▂▇▅▆▂
weight_nz.b 0 1 82.01 15.90 59.30 70.81 80.98 88.85 120.46 ▇▅▆▂▂
systolicBP_nz.b 0 1 144.86 15.84 130.98 134.71 140.20 146.54 192.79 ▇▃▁▁▁
diastolicBP_nz.b 0 1 89.44 9.31 68.94 85.10 89.16 91.76 111.44 ▂▂▇▁▂
ldl_c_nz.b 0 1 137.93 33.37 67.33 109.47 141.31 162.84 201.55 ▂▅▇▅▃
hdl_c_nz.b 0 1 51.16 14.73 32.33 41.78 45.88 57.57 89.24 ▇▇▃▂▂
TG_nz.b 0 1 240.11 157.52 67.15 126.42 183.14 292.91 611.58 ▇▅▁▁▂
HbA1c_nz.b 0 1 7.62 1.42 6.53 6.72 6.97 7.80 12.17 ▇▂▁▁▁
weight_nz.f 0 1 79.20 14.76 59.26 69.45 78.01 87.70 114.79 ▇▇▇▁▂
systolicBP_nz.f 0 1 136.36 14.70 106.65 126.08 136.07 146.41 167.78 ▂▇▇▆▃
diastolicBP_nz.f 0 1 83.06 9.38 61.77 77.04 82.42 89.67 100.17 ▂▃▇▆▅
ldl_c_nz.f 0 1 128.55 36.40 53.48 102.15 131.36 156.85 204.44 ▂▇▇▇▂
hdl_c_nz.f 0 1 53.41 13.46 25.77 45.65 51.13 60.04 92.02 ▂▇▅▂▁
TG_nz.f 0 1 207.98 253.93 70.63 109.11 136.71 199.15 1483.99 ▇▁▁▁▁
HbA1c_nz.f 0 1 7.55 1.45 5.61 6.60 7.10 8.03 12.56 ▇▇▁▁▁
ggplot(data3_high,mapping = aes(x=age,fill=gender))+
  geom_histogram(binwidth=5, position="identity",alpha=0.5) +
  facet_wrap(~gender,scales = "free")+
  theme_bw()

data3_high %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.quintile, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n())) %>%
  ggplot(mapping = aes(x = step.diff.quintile, y =mean)) +
  geom_bar(stat = "identity",position = "dodge") +
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se),width=0.2)+
  facet_wrap(~measure_type,scales = "free")+
  theme_classic()
## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.