データ読み込み

pacman::p_load(tidyverse, lubridate, tableone,
               survival, survminer, car, svglite,missForest,naniar)
 user.data_8.detect <- read.csv("/Users/abekohei/Desktop/HDS演習_DeSC/HDS_DeSC/user.data_8.欠測値補完用.csv")
colnames(user.data_8.detect)
##  [1] "ID"                     "gender"                 "age"                   
##  [4] "kencom登録年月"         "資格取得年月"           "資格喪失年月"          
##  [7] "レセプトデータ開始年月" "レセプトデータ終了年月" "baseline健診受診年月"  
## [10] "weight_nz.b"            "systolicBP_nz.b"        "diastolicBP_nz.b"      
## [13] "ldl_c_nz.b"             "hdl_c_nz.b"             "TG_nz.b"               
## [16] "HbA1c_nz.b"             "データ有り月数_total"   "データ有り月数_pre"    
## [19] "データ有り月数_post"    "月ごとの平均データ数"   "step_mean_pre"         
## [22] "step_mean_post"         "step_sd_pre"            "step_sd_post"          
## [25] "step_mean_pre.wd"       "step_mean_post.wd"      "step_sd_pre.wd"        
## [28] "step_sd_post.wd"        "step_mean_pre.we"       "step_mean_post.we"     
## [31] "step_sd_pre.we"         "step_sd_post.we"        "step.diff"             
## [34] "step.diff.wd"           "step.diff.we"           "weight_nz.f"           
## [37] "systolicBP_nz.f"        "diastolicBP_nz.f"       "ldl_c_nz.f"            
## [40] "hdl_c_nz.f"             "TG_nz.f"                "HbA1c_nz.f"            
## [43] "followup健診受診年月"   "weight_nz.diff"         "systolicBP_nz.diff"    
## [46] "diastolicBP_nz.diff"    "ldl_c_nz.diff"          "hdl_c_nz.diff"         
## [49] "TG_nz.diff"             "HbA1c_nz.diff"

欠測値の確認

is.na(user.data_8.detect) %>% colSums()
##                     ID                 gender                    age 
##                      0                      0                      0 
##         kencom登録年月           資格取得年月           資格喪失年月 
##                      0                      0                      0 
## レセプトデータ開始年月 レセプトデータ終了年月   baseline健診受診年月 
##                      0                      0                      0 
##            weight_nz.b        systolicBP_nz.b       diastolicBP_nz.b 
##                      4                      4                      4 
##             ldl_c_nz.b             hdl_c_nz.b                TG_nz.b 
##                     24                     23                     23 
##             HbA1c_nz.b   データ有り月数_total     データ有り月数_pre 
##                    175                      0                      0 
##    データ有り月数_post   月ごとの平均データ数          step_mean_pre 
##                      0                      0                      0 
##         step_mean_post            step_sd_pre           step_sd_post 
##                      0                      0                      0 
##       step_mean_pre.wd      step_mean_post.wd         step_sd_pre.wd 
##                      0                      0                      0 
##        step_sd_post.wd       step_mean_pre.we      step_mean_post.we 
##                      0                      0                      0 
##         step_sd_pre.we        step_sd_post.we              step.diff 
##                      0                      0                      0 
##           step.diff.wd           step.diff.we            weight_nz.f 
##                      0                      0                      2 
##        systolicBP_nz.f       diastolicBP_nz.f             ldl_c_nz.f 
##                      1                      1                      9 
##             hdl_c_nz.f                TG_nz.f             HbA1c_nz.f 
##                      8                      8                    104 
##   followup健診受診年月         weight_nz.diff     systolicBP_nz.diff 
##                      0                      6                      5 
##    diastolicBP_nz.diff          ldl_c_nz.diff          hdl_c_nz.diff 
##                      5                     26                     24 
##             TG_nz.diff          HbA1c_nz.diff 
##                     24                    203
gg_miss_var(user.data_8.detect,show_pct=TRUE)

ggplot(user.data_8.detect,aes(x=age,y=HbA1c_nz.b))+
  geom_miss_point()

ggplot(user.data_8.detect,aes(x=age,y=HbA1c_nz.f))+
  geom_miss_point()

ggplot(user.data_8.detect,aes(x=age,y=ldl_c_nz.f))+
  geom_miss_point()

データ型の確認(missForest関数はcharacter型を含むデータは不可能のため。factor型、numeric型、integer型は可能)

sapply(user.data_8.detect, class)
##                     ID                 gender                    age 
##            "character"            "character"              "integer" 
##         kencom登録年月           資格取得年月           資格喪失年月 
##            "character"            "character"            "character" 
## レセプトデータ開始年月 レセプトデータ終了年月   baseline健診受診年月 
##            "character"            "character"            "character" 
##            weight_nz.b        systolicBP_nz.b       diastolicBP_nz.b 
##              "numeric"              "numeric"              "numeric" 
##             ldl_c_nz.b             hdl_c_nz.b                TG_nz.b 
##              "numeric"              "numeric"              "numeric" 
##             HbA1c_nz.b   データ有り月数_total     データ有り月数_pre 
##              "numeric"              "integer"              "integer" 
##    データ有り月数_post   月ごとの平均データ数          step_mean_pre 
##              "integer"              "numeric"              "numeric" 
##         step_mean_post            step_sd_pre           step_sd_post 
##              "numeric"              "numeric"              "numeric" 
##       step_mean_pre.wd      step_mean_post.wd         step_sd_pre.wd 
##              "numeric"              "numeric"              "numeric" 
##        step_sd_post.wd       step_mean_pre.we      step_mean_post.we 
##              "numeric"              "numeric"              "numeric" 
##         step_sd_pre.we        step_sd_post.we              step.diff 
##              "numeric"              "numeric"              "numeric" 
##           step.diff.wd           step.diff.we            weight_nz.f 
##              "numeric"              "numeric"              "numeric" 
##        systolicBP_nz.f       diastolicBP_nz.f             ldl_c_nz.f 
##              "numeric"              "numeric"              "numeric" 
##             hdl_c_nz.f                TG_nz.f             HbA1c_nz.f 
##              "numeric"              "numeric"              "numeric" 
##   followup健診受診年月         weight_nz.diff     systolicBP_nz.diff 
##            "character"              "numeric"              "numeric" 
##    diastolicBP_nz.diff          ldl_c_nz.diff          hdl_c_nz.diff 
##              "numeric"              "numeric"              "numeric" 
##             TG_nz.diff          HbA1c_nz.diff 
##              "numeric"              "numeric"
#user.data_8.detect$ID<-as.factor(user.data_8.detect$ID) #IDは削除するので不要
user.data_8.detect$gender<-as.factor(user.data_8.detect$gender)
user.data_8.detect$age<-as.numeric(user.data_8.detect$age)
user.data_8.detect$データ有り月数_total<-as.numeric(user.data_8.detect$データ有り月数_total)
user.data_8.detect$データ有り月数_pre<-as.numeric(user.data_8.detect$データ有り月数_pre)
user.data_8.detect$データ有り月数_post<-as.numeric(user.data_8.detect$データ有り月数_post)

missForest対応していないデータは削除

user.data_8.detect_missF <-
  user.data_8.detect %>%
  ungroup() %>%  # グルーピングを解除
  select(-ID, -kencom登録年月, -baseline健診受診年月, -followup健診受診年月, 
         -資格取得年月, -資格喪失年月, -レセプトデータ開始年月, -レセプトデータ終了年月)

#IDは削除したくないため、取り出しておく。missForest関数の結果と後で統合するため。
id <- user.data_8.detect$ID
sapply(user.data_8.detect_missF, class)
##               gender                  age          weight_nz.b 
##             "factor"            "numeric"            "numeric" 
##      systolicBP_nz.b     diastolicBP_nz.b           ldl_c_nz.b 
##            "numeric"            "numeric"            "numeric" 
##           hdl_c_nz.b              TG_nz.b           HbA1c_nz.b 
##            "numeric"            "numeric"            "numeric" 
## データ有り月数_total   データ有り月数_pre  データ有り月数_post 
##            "numeric"            "numeric"            "numeric" 
## 月ごとの平均データ数        step_mean_pre       step_mean_post 
##            "numeric"            "numeric"            "numeric" 
##          step_sd_pre         step_sd_post     step_mean_pre.wd 
##            "numeric"            "numeric"            "numeric" 
##    step_mean_post.wd       step_sd_pre.wd      step_sd_post.wd 
##            "numeric"            "numeric"            "numeric" 
##     step_mean_pre.we    step_mean_post.we       step_sd_pre.we 
##            "numeric"            "numeric"            "numeric" 
##      step_sd_post.we            step.diff         step.diff.wd 
##            "numeric"            "numeric"            "numeric" 
##         step.diff.we          weight_nz.f      systolicBP_nz.f 
##            "numeric"            "numeric"            "numeric" 
##     diastolicBP_nz.f           ldl_c_nz.f           hdl_c_nz.f 
##            "numeric"            "numeric"            "numeric" 
##              TG_nz.f           HbA1c_nz.f       weight_nz.diff 
##            "numeric"            "numeric"            "numeric" 
##   systolicBP_nz.diff  diastolicBP_nz.diff        ldl_c_nz.diff 
##            "numeric"            "numeric"            "numeric" 
##        hdl_c_nz.diff           TG_nz.diff        HbA1c_nz.diff 
##            "numeric"            "numeric"            "numeric"

missForest実行

doParallelを使って並列処理をすると高速化するので、実際行う場合には並列化推奨。   ただし、注意すべき点がいくつかあるので、本コード内では安全性を考慮して並列処理は行わない。並列化しなくても本データは1~2時間で補完できる(はず)。

imp_missForest <-
  missForest(xmis = as.data.frame(user.data_8.detect_missF),
             maxiter = 30,
             ntree = 100
             )
imp_missForest$OOBerror
##       NRMSE         PFC 
## 0.004831176 0.000000000

結果の確認

user.data_9_hokan <- imp_missForest$ximp
user.data_9_hokan <- cbind(ID = id, imp_missForest$ximp) #IDを統合。IDは保持できている。
user.data_9_hokan <- 
  user.data_9_hokan %>% 
  inner_join(user.data_8.detect %>% select(ID,kencom登録年月, baseline健診受診年月, followup健診受診年月, 
                                         資格取得年月, 資格喪失年月, レセプトデータ開始年月, レセプトデータ終了年月), by = "ID")
user.data_9_hokan %>%
  write.csv(., "/Users/abekohei/Desktop/HDS演習_DeSC/HDS_DeSC/user.data_9.欠測値補完済.csv", row.names = FALSE)
data3_hokan <- read.csv("/Users/abekohei/Desktop/HDS演習_DeSC/HDS_DeSC/data3_exam.destructed_original_hokan.csv")

分析2-1

data3_hokan %>% head()
##     仮個人id 性別_sfl 年齢_sfl kencom登録年月 step.diff.quintile
## 1 0yuPvFowq6     男性       66     2016-04-01                  1
## 2 1OwsKnMNaE     男性       39     2016-04-01                  3
## 3 0IliCoWCZV     男性       31     2016-04-01                  3
## 4 M0tEnZHNql     女性       51     2016-04-01                  2
## 5 PDnTXele4g     男性       26     2016-04-01                  5
## 6 AecvtKe8cE     男性       34     2016-05-01                  2
##   step.diff.median.by.group weight_nz.diff systolicBP_nz.diff
## 1                -961.92398     -4.1390000           1.204490
## 2                  61.99986     -3.1120477           5.615931
## 3                  61.99986      0.7284926          -2.445457
## 4                -327.63873      4.0614129          24.935567
## 5                1581.09765     -2.6122737          10.506455
## 6                -327.63873     -0.3897352          -4.238583
##   diastolicBP_nz.diff ldl_c_nz.diff hdl_c_nz.diff TG_nz.diff HbA1c_nz.diff
## 1           2.6720062     5.3590352     0.4610505 -15.515152   -0.48755945
## 2          -3.6108605    -0.3891425     1.9976718  -2.879915   -0.21745953
## 3          -2.5427406   -22.6019218     4.9481587  -2.196534   -0.06833491
## 4           1.9345428   -15.3831264    -2.2091562 -19.505292   -0.16728154
## 5           8.2586546   -18.5112023   -18.5583183 104.989170   -0.08458658
## 6          -0.4893504   -10.5642779   -11.3340042  11.296307   -0.02670434
##   step_mean.diff baseline健診受診年月 followup健診受診年月 weight_nz.b
## 1     -623.47945           2015-04-01           2017-04-01    66.89074
## 2      -12.98046           2015-06-01           2017-04-01    90.51351
## 3      143.71995           2015-05-01           2017-05-01    71.95757
## 4     -301.81007           2015-05-01           2017-05-01    67.16708
## 5     1174.52970           2015-05-01           2017-05-01    70.95344
## 6     -576.22646           2015-06-01           2017-05-01    65.04586
##   systolicBP_nz.b diastolicBP_nz.b ldl_c_nz.b hdl_c_nz.b   TG_nz.b HbA1c_nz.b
## 1        109.9507         71.62864   108.8772   44.50094  76.35546   5.631184
## 2        119.0751         87.45228   117.3376   45.69706 108.15197   6.440423
## 3        133.8459         78.75107   118.8808   58.35234  67.63990   5.337701
## 4        112.6972         80.13369   124.2131   54.28470 132.65872   5.760370
## 5        101.7182         56.37320   155.5491   63.88320  72.98456   5.292819
## 6        111.6170         68.20776   136.7508   71.41820  78.06746   5.156067
##   weight_nz.f systolicBP_nz.f diastolicBP_nz.f ldl_c_nz.f hdl_c_nz.f   TG_nz.f
## 1    62.75174        111.1552         74.30065  114.23626   44.96199  60.84030
## 2    87.40146        124.6911         83.84142  116.94841   47.69473 105.27205
## 3    72.68606        131.4004         76.20833   96.27893   63.30050  65.44337
## 4    71.22849        137.6327         82.06824  108.82998   52.07554 113.15343
## 5    68.34116        112.2247         64.63186  137.03787   45.32488 177.97374
## 6    64.65613        107.3784         67.71841  126.18648   60.08419  89.36376
##   HbA1c_nz.f
## 1   5.143625
## 2   6.222963
## 3   5.269366
## 4   5.593088
## 5   5.208233
## 6   5.129363
data3_hokan %>% skimr::skim()
Data summary
Name Piped data
Number of rows 5680
Number of columns 30
_______________________
Column type frequency:
character 5
numeric 25
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
仮個人id 0 1 10 10 0 5680 0
性別_sfl 0 1 2 2 0 2 0
kencom登録年月 0 1 10 10 0 24 0
baseline健診受診年月 0 1 10 10 0 32 0
followup健診受診年月 0 1 10 10 0 24 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
年齢_sfl 0 1 44.85 11.08 18.00 37.00 46.00 53.00 73.00 ▂▅▇▆▁
step.diff.quintile 0 1 2.99 1.42 1.00 2.00 3.00 4.00 5.00 ▇▇▇▇▇
step.diff.median.by.group 0 1 167.23 853.89 -961.92 -327.64 62.00 510.03 1581.10 ▅▃▇▁▃
weight_nz.diff 0 1 0.37 4.09 -31.13 -2.05 0.44 2.92 19.59 ▁▁▅▇▁
systolicBP_nz.diff 0 1 1.04 13.35 -59.82 -7.58 1.04 9.51 52.65 ▁▁▇▃▁
diastolicBP_nz.diff 0 1 1.02 9.60 -37.00 -5.21 0.99 7.20 45.97 ▁▃▇▂▁
ldl_c_nz.diff 0 1 1.47 22.48 -143.84 -10.55 1.73 14.31 139.08 ▁▁▇▁▁
hdl_c_nz.diff 0 1 1.12 9.21 -43.70 -4.56 0.83 6.40 52.75 ▁▂▇▁▁
TG_nz.diff 0 1 -0.01 72.32 -1279.42 -18.80 0.15 19.67 1146.49 ▁▁▇▁▁
HbA1c_nz.diff 0 1 0.04 0.37 -4.51 -0.17 0.04 0.24 5.93 ▁▁▇▁▁
step_mean.diff 0 1 209.38 1301.86 -6205.96 -442.92 58.09 660.11 12842.29 ▁▇▁▁▁
weight_nz.b 0 1 64.08 12.36 32.24 54.99 63.62 71.77 139.68 ▃▇▂▁▁
systolicBP_nz.b 0 1 117.88 15.13 75.98 107.22 117.10 127.38 209.34 ▂▇▂▁▁
diastolicBP_nz.b 0 1 73.18 11.51 41.28 64.99 72.28 80.81 126.16 ▂▇▆▁▁
ldl_c_nz.b 0 1 120.58 30.26 28.98 99.26 118.20 139.74 276.22 ▁▇▅▁▁
hdl_c_nz.b 0 1 63.26 16.04 24.59 51.51 61.31 72.91 140.46 ▂▇▃▁▁
TG_nz.b 0 1 102.81 82.10 20.62 57.46 81.49 119.74 1450.27 ▇▁▁▁▁
HbA1c_nz.b 0 1 5.42 0.47 4.09 5.15 5.36 5.61 12.17 ▇▂▁▁▁
weight_nz.f 0 1 64.45 12.35 31.64 55.43 64.10 71.80 141.43 ▂▇▂▁▁
systolicBP_nz.f 0 1 118.92 15.38 77.58 108.04 118.40 128.88 204.01 ▂▇▃▁▁
diastolicBP_nz.f 0 1 74.20 11.43 41.49 66.21 73.56 81.76 121.98 ▁▇▇▂▁
ldl_c_nz.f 0 1 122.04 30.77 28.81 100.75 119.66 141.05 279.04 ▁▇▅▁▁
hdl_c_nz.f 0 1 64.38 16.54 25.77 52.36 62.18 74.19 151.11 ▃▇▃▁▁
TG_nz.f 0 1 102.80 81.76 19.96 57.61 81.43 122.58 1544.59 ▇▁▁▁▁
HbA1c_nz.f 0 1 5.46 0.50 4.05 5.18 5.40 5.65 12.56 ▇▂▁▁▁
colnames(data3_hokan)
##  [1] "仮個人id"                  "性別_sfl"                 
##  [3] "年齢_sfl"                  "kencom登録年月"           
##  [5] "step.diff.quintile"        "step.diff.median.by.group"
##  [7] "weight_nz.diff"            "systolicBP_nz.diff"       
##  [9] "diastolicBP_nz.diff"       "ldl_c_nz.diff"            
## [11] "hdl_c_nz.diff"             "TG_nz.diff"               
## [13] "HbA1c_nz.diff"             "step_mean.diff"           
## [15] "baseline健診受診年月"      "followup健診受診年月"     
## [17] "weight_nz.b"               "systolicBP_nz.b"          
## [19] "diastolicBP_nz.b"          "ldl_c_nz.b"               
## [21] "hdl_c_nz.b"                "TG_nz.b"                  
## [23] "HbA1c_nz.b"                "weight_nz.f"              
## [25] "systolicBP_nz.f"           "diastolicBP_nz.f"         
## [27] "ldl_c_nz.f"                "hdl_c_nz.f"               
## [29] "TG_nz.f"                   "HbA1c_nz.f"
data3_hokan <- data3_hokan %>% 
  rename(gender = 性別_sfl, age = 年齢_sfl) %>%
  mutate(gender = if_else(gender == "男性", "male", "female"))

#data3の確認

ggplot(data3_hokan,mapping = aes(x=step.diff.quintile,fill=gender))+
geom_bar()

hist(data3_hokan$age)

ggplot(data3_hokan, aes(x=age, fill=gender)) +
  geom_histogram(binwidth=1, position="identity",alpha=0.5) +
  scale_fill_brewer(palette="Set1") +
  labs(title="age", x="age", y="count") +
  theme_minimal()

hist(data3_hokan$step_mean.diff)

ggplot(data3_hokan,mapping = aes(x=weight_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3_hokan,mapping = aes(x=systolicBP_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3_hokan,mapping = aes(x=diastolicBP_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3_hokan,mapping = aes(x=ldl_c_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3_hokan,mapping = aes(x=hdl_c_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3_hokan,mapping = aes(x=TG_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3_hokan,mapping = aes(x=HbA1c_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=weight_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=systolicBP_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=diastolicBP_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=ldl_c_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=hdl_c_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=TG_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=HbA1c_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()
## `geom_smooth()` using formula = 'y ~ x'

data3_hokan_longer<- data3_hokan %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "value"
  )
ggplot(data3_hokan_longer, aes(x = step_mean.diff, y = value, color = gender)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  facet_wrap(~measure_type, scales = "free") +
  theme_bw()
## `geom_smooth()` using formula = 'y ~ x'

data3_hokan %>% group_by(gender,"HbA1c_nz.diff") %>% 
  summarise(mean_Hb=mean(HbA1c_nz.diff)) %>% 
  ggplot(mapping = aes(x=gender,y=mean_Hb,fill=gender))+
  geom_bar(stat="identity",position="dodge")+
  xlab("Gender") +ylab("HbA1c_diff")+
  theme_bw()
## `summarise()` has grouped output by 'gender'. You can override using the
## `.groups` argument.

t.test(HbA1c_nz.diff~gender,data=data3_hokan)
## 
##  Welch Two Sample t-test
## 
## data:  HbA1c_nz.diff by gender
## t = -0.50046, df = 5205.3, p-value = 0.6168
## alternative hypothesis: true difference in means between group female and group male is not equal to 0
## 95 percent confidence interval:
##  -0.02415687  0.01433144
## sample estimates:
## mean in group female   mean in group male 
##           0.03511848           0.04003119

#ヒストグラム作成

#step.diff.quintileごとの平均年齢
data3_hokan %>% 
  group_by(step.diff.quintile) %>% 
  summarise(mean_age=mean(age,na.rm=T)) %>%
  ggplot(mapping = aes(x=step.diff.quintile,y=mean_age))+
  geom_bar(stat="identity")+
  xlab("Step Difference Quintile") +
  ylab("Mean Age")+
  theme_bw()

#step.diff.quintileごとにおけるベースライン時と健診時の体重差

#step.diff.quintileごとにおけるベースライン時と健診時の体重差
data3_hokan %>% 
  group_by(step.diff.quintile) %>% 
  summarise(mean_weight=mean(weight_nz.diff,na.rm=T),
                             sd_weight=sd(weight_nz.diff,na.rm=T),
                             se_weight=sd_weight / sqrt(n())) %>%
  ggplot(mapping = aes(x=step.diff.quintile,y=mean_weight))+
  geom_errorbar(aes(ymin=mean_weight-se_weight,ymax=mean_weight+se_weight),width=0.3)+
  geom_bar(stat="identity")+
  xlab("Step Difference Quintile") +
  ylab("Changing Weight(Mean Weight)")+
  theme_bw()

data3_hokan %>% 
  group_by(step.diff.quintile, gender) %>% 
  summarise(mean_weight=mean(weight_nz.diff, na.rm=TRUE),
            sd_weight=sd(weight_nz.diff, na.rm=TRUE),
            se_weight=sd_weight / sqrt(n())) %>% 
  ggplot(mapping = aes(x=step.diff.quintile, y=mean_weight, fill=gender)) +
  geom_errorbar(aes(ymin=mean_weight-se_weight, ymax=mean_weight+se_weight), width=0.3,    
  position=position_dodge(width=0.5)) +
  geom_bar(stat="identity", position=position_dodge(width=0.5)) +
  xlab("Step Difference Quintile") +
  ylab("Changing Weight (Mean Weight)") +
  theme_bw() +
  scale_fill_manual(values = c("male" = "blue", "female" = "pink")) # ここで性別ごとの色を指定
## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.

data3_hokan %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.quintile, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n())) %>%
  ggplot(mapping = aes(x = step.diff.quintile, y =mean)) +
  geom_bar(stat = "identity",position = "dodge") +
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se),width=0.2)+
  facet_wrap(~measure_type,scales = "free")+
  theme_classic()
## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.

分析2-2

健診検査値変化量 = Β x step.diff.median.by.group+切片(intercept)

data3_hokan %>% head()
##     仮個人id gender age kencom登録年月 step.diff.quintile
## 1 0yuPvFowq6   male  66     2016-04-01                  1
## 2 1OwsKnMNaE   male  39     2016-04-01                  3
## 3 0IliCoWCZV   male  31     2016-04-01                  3
## 4 M0tEnZHNql female  51     2016-04-01                  2
## 5 PDnTXele4g   male  26     2016-04-01                  5
## 6 AecvtKe8cE   male  34     2016-05-01                  2
##   step.diff.median.by.group weight_nz.diff systolicBP_nz.diff
## 1                -961.92398     -4.1390000           1.204490
## 2                  61.99986     -3.1120477           5.615931
## 3                  61.99986      0.7284926          -2.445457
## 4                -327.63873      4.0614129          24.935567
## 5                1581.09765     -2.6122737          10.506455
## 6                -327.63873     -0.3897352          -4.238583
##   diastolicBP_nz.diff ldl_c_nz.diff hdl_c_nz.diff TG_nz.diff HbA1c_nz.diff
## 1           2.6720062     5.3590352     0.4610505 -15.515152   -0.48755945
## 2          -3.6108605    -0.3891425     1.9976718  -2.879915   -0.21745953
## 3          -2.5427406   -22.6019218     4.9481587  -2.196534   -0.06833491
## 4           1.9345428   -15.3831264    -2.2091562 -19.505292   -0.16728154
## 5           8.2586546   -18.5112023   -18.5583183 104.989170   -0.08458658
## 6          -0.4893504   -10.5642779   -11.3340042  11.296307   -0.02670434
##   step_mean.diff baseline健診受診年月 followup健診受診年月 weight_nz.b
## 1     -623.47945           2015-04-01           2017-04-01    66.89074
## 2      -12.98046           2015-06-01           2017-04-01    90.51351
## 3      143.71995           2015-05-01           2017-05-01    71.95757
## 4     -301.81007           2015-05-01           2017-05-01    67.16708
## 5     1174.52970           2015-05-01           2017-05-01    70.95344
## 6     -576.22646           2015-06-01           2017-05-01    65.04586
##   systolicBP_nz.b diastolicBP_nz.b ldl_c_nz.b hdl_c_nz.b   TG_nz.b HbA1c_nz.b
## 1        109.9507         71.62864   108.8772   44.50094  76.35546   5.631184
## 2        119.0751         87.45228   117.3376   45.69706 108.15197   6.440423
## 3        133.8459         78.75107   118.8808   58.35234  67.63990   5.337701
## 4        112.6972         80.13369   124.2131   54.28470 132.65872   5.760370
## 5        101.7182         56.37320   155.5491   63.88320  72.98456   5.292819
## 6        111.6170         68.20776   136.7508   71.41820  78.06746   5.156067
##   weight_nz.f systolicBP_nz.f diastolicBP_nz.f ldl_c_nz.f hdl_c_nz.f   TG_nz.f
## 1    62.75174        111.1552         74.30065  114.23626   44.96199  60.84030
## 2    87.40146        124.6911         83.84142  116.94841   47.69473 105.27205
## 3    72.68606        131.4004         76.20833   96.27893   63.30050  65.44337
## 4    71.22849        137.6327         82.06824  108.82998   52.07554 113.15343
## 5    68.34116        112.2247         64.63186  137.03787   45.32488 177.97374
## 6    64.65613        107.3784         67.71841  126.18648   60.08419  89.36376
##   HbA1c_nz.f
## 1   5.143625
## 2   6.222963
## 3   5.269366
## 4   5.593088
## 5   5.208233
## 6   5.129363

step.diff.median.by.groupについて

unique(data3_hokan$step.diff.median.by.group)
## [1] -961.92398   61.99986 -327.63873 1581.09765  510.03305
n_distinct(data3_hokan$step.diff.median.by.group)
## [1] 5
summary(data3_hokan$step.diff.median.by.group)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -961.9  -327.6    62.0   167.2   510.0  1581.1
str(data3_hokan$step.diff.median.by.group)
##  num [1:5680] -962 62 62 -328 1581 ...
data4_hokan<-data3_hokan %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.median.by.group, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n())) 
## `summarise()` has grouped output by 'step.diff.median.by.group'. You can
## override using the `.groups` argument.
data4_hokan %>% head()
## # A tibble: 6 × 5
## # Groups:   step.diff.median.by.group [1]
##   step.diff.median.by.group measure_type          mean     sd     se
##                       <dbl> <chr>                <dbl>  <dbl>  <dbl>
## 1                     -962. HbA1c_nz.diff       0.0492  0.417 0.0123
## 2                     -962. TG_nz.diff          3.39   82.1   2.42  
## 3                     -962. diastolicBP_nz.diff 1.35    9.08  0.268 
## 4                     -962. hdl_c_nz.diff       0.156   9.32  0.275 
## 5                     -962. ldl_c_nz.diff       3.03   23.4   0.688 
## 6                     -962. systolicBP_nz.diff  1.69   12.7   0.375
#dBP
DBP_hokan<-
  data4_hokan %>% 
  filter(measure_type=="diastolicBP_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="dBP_diff",.before=Estimate)
#sBP
SBP_hokan<-
  data4_hokan %>% 
  filter(measure_type=="systolicBP_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="sBP_diff",.before=Estimate)
#weight
weight_hokan<-
  data4_hokan %>% 
  filter(measure_type=="weight_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="weight_diff",.before=Estimate)
#HbA1c
HbA1c_hokan<-
  data4_hokan %>% 
  filter(measure_type=="HbA1c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="HbA1c_diff",.before=Estimate)
#TG
TG_hokan<-
  data4_hokan %>% 
  filter(measure_type=="TG_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="TG_diff",.before=Estimate)
#HDL
HDL_hokan<-
  data4_hokan %>% 
  filter(measure_type=="hdl_c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="HDL_diff",.before=Estimate)
#LDL
LDL_hokan<-
  data4_hokan %>% 
  filter(measure_type=="ldl_c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="LDL_diff",.before=Estimate)
data5_hokan<-bind_rows(DBP_hokan,SBP_hokan,weight_hokan,HbA1c_hokan,TG_hokan,HDL_hokan,LDL_hokan)

全体のsBP,weight,HbA1c,HDLについては有意差あり

data5_hokan %>% 
  mutate("p<0.05"=if_else(p_value<0.05,"*","")) 
##                                      term      Estimate     p_value p<0.05
## step.diff.median.by.group...1    dBP_diff -2.851582e-04 0.065543559       
## step.diff.median.by.group...2    sBP_diff -4.436297e-04 0.007693330      *
## step.diff.median.by.group...3 weight_diff -3.547857e-04 0.003060491      *
## step.diff.median.by.group...4  HbA1c_diff -1.516801e-05 0.023672830      *
## step.diff.median.by.group...5     TG_diff -2.229034e-03 0.319833692       
## step.diff.median.by.group...6    HDL_diff  8.257078e-04 0.007198414      *
## step.diff.median.by.group...7    LDL_diff -6.019312e-04 0.249781215