データ読み込み

pacman::p_load(tidyverse, lubridate, tableone,
               survival, survminer, car, svglite,missForest,naniar)

 user.data_8.detect <- read.csv("/Users/abekohei/Desktop/HDS演習＿DeSC/HDS_DeSC/user.data_8.欠測値補完用.csv")

colnames(user.data_8.detect)

##  [1] "ID"                     "gender"                 "age"                   
##  [4] "kencom登録年月"         "資格取得年月"           "資格喪失年月"          
##  [7] "レセプトデータ開始年月" "レセプトデータ終了年月" "baseline健診受診年月"  
## [10] "weight_nz.b"            "systolicBP_nz.b"        "diastolicBP_nz.b"      
## [13] "ldl_c_nz.b"             "hdl_c_nz.b"             "TG_nz.b"               
## [16] "HbA1c_nz.b"             "データ有り月数_total"   "データ有り月数_pre"    
## [19] "データ有り月数_post"    "月ごとの平均データ数"   "step_mean_pre"         
## [22] "step_mean_post"         "step_sd_pre"            "step_sd_post"          
## [25] "step_mean_pre.wd"       "step_mean_post.wd"      "step_sd_pre.wd"        
## [28] "step_sd_post.wd"        "step_mean_pre.we"       "step_mean_post.we"     
## [31] "step_sd_pre.we"         "step_sd_post.we"        "step.diff"             
## [34] "step.diff.wd"           "step.diff.we"           "weight_nz.f"           
## [37] "systolicBP_nz.f"        "diastolicBP_nz.f"       "ldl_c_nz.f"            
## [40] "hdl_c_nz.f"             "TG_nz.f"                "HbA1c_nz.f"            
## [43] "followup健診受診年月"   "weight_nz.diff"         "systolicBP_nz.diff"    
## [46] "diastolicBP_nz.diff"    "ldl_c_nz.diff"          "hdl_c_nz.diff"         
## [49] "TG_nz.diff"             "HbA1c_nz.diff"

欠測値の確認

is.na(user.data_8.detect) %>% colSums()

##                     ID                 gender                    age 
##                      0                      0                      0 
##         kencom登録年月           資格取得年月           資格喪失年月 
##                      0                      0                      0 
## レセプトデータ開始年月 レセプトデータ終了年月   baseline健診受診年月 
##                      0                      0                      0 
##            weight_nz.b        systolicBP_nz.b       diastolicBP_nz.b 
##                      4                      4                      4 
##             ldl_c_nz.b             hdl_c_nz.b                TG_nz.b 
##                     24                     23                     23 
##             HbA1c_nz.b   データ有り月数_total     データ有り月数_pre 
##                    175                      0                      0 
##    データ有り月数_post   月ごとの平均データ数          step_mean_pre 
##                      0                      0                      0 
##         step_mean_post            step_sd_pre           step_sd_post 
##                      0                      0                      0 
##       step_mean_pre.wd      step_mean_post.wd         step_sd_pre.wd 
##                      0                      0                      0 
##        step_sd_post.wd       step_mean_pre.we      step_mean_post.we 
##                      0                      0                      0 
##         step_sd_pre.we        step_sd_post.we              step.diff 
##                      0                      0                      0 
##           step.diff.wd           step.diff.we            weight_nz.f 
##                      0                      0                      2 
##        systolicBP_nz.f       diastolicBP_nz.f             ldl_c_nz.f 
##                      1                      1                      9 
##             hdl_c_nz.f                TG_nz.f             HbA1c_nz.f 
##                      8                      8                    104 
##   followup健診受診年月         weight_nz.diff     systolicBP_nz.diff 
##                      0                      6                      5 
##    diastolicBP_nz.diff          ldl_c_nz.diff          hdl_c_nz.diff 
##                      5                     26                     24 
##             TG_nz.diff          HbA1c_nz.diff 
##                     24                    203

gg_miss_var(user.data_8.detect,show_pct=TRUE)

ggplot(user.data_8.detect,aes(x=age,y=HbA1c_nz.b))+
  geom_miss_point()

ggplot(user.data_8.detect,aes(x=age,y=HbA1c_nz.f))+
  geom_miss_point()

ggplot(user.data_8.detect,aes(x=age,y=ldl_c_nz.f))+
  geom_miss_point()

データ型の確認(missForest関数はcharacter型を含むデータは不可能のため。factor型、numeric型、integer型は可能)

sapply(user.data_8.detect, class)

##                     ID                 gender                    age 
##            "character"            "character"              "integer" 
##         kencom登録年月           資格取得年月           資格喪失年月 
##            "character"            "character"            "character" 
## レセプトデータ開始年月 レセプトデータ終了年月   baseline健診受診年月 
##            "character"            "character"            "character" 
##            weight_nz.b        systolicBP_nz.b       diastolicBP_nz.b 
##              "numeric"              "numeric"              "numeric" 
##             ldl_c_nz.b             hdl_c_nz.b                TG_nz.b 
##              "numeric"              "numeric"              "numeric" 
##             HbA1c_nz.b   データ有り月数_total     データ有り月数_pre 
##              "numeric"              "integer"              "integer" 
##    データ有り月数_post   月ごとの平均データ数          step_mean_pre 
##              "integer"              "numeric"              "numeric" 
##         step_mean_post            step_sd_pre           step_sd_post 
##              "numeric"              "numeric"              "numeric" 
##       step_mean_pre.wd      step_mean_post.wd         step_sd_pre.wd 
##              "numeric"              "numeric"              "numeric" 
##        step_sd_post.wd       step_mean_pre.we      step_mean_post.we 
##              "numeric"              "numeric"              "numeric" 
##         step_sd_pre.we        step_sd_post.we              step.diff 
##              "numeric"              "numeric"              "numeric" 
##           step.diff.wd           step.diff.we            weight_nz.f 
##              "numeric"              "numeric"              "numeric" 
##        systolicBP_nz.f       diastolicBP_nz.f             ldl_c_nz.f 
##              "numeric"              "numeric"              "numeric" 
##             hdl_c_nz.f                TG_nz.f             HbA1c_nz.f 
##              "numeric"              "numeric"              "numeric" 
##   followup健診受診年月         weight_nz.diff     systolicBP_nz.diff 
##            "character"              "numeric"              "numeric" 
##    diastolicBP_nz.diff          ldl_c_nz.diff          hdl_c_nz.diff 
##              "numeric"              "numeric"              "numeric" 
##             TG_nz.diff          HbA1c_nz.diff 
##              "numeric"              "numeric"

#user.data_8.detect$ID<-as.factor(user.data_8.detect$ID) #IDは削除するので不要
user.data_8.detect$gender<-as.factor(user.data_8.detect$gender)
user.data_8.detect$age<-as.numeric(user.data_8.detect$age)
user.data_8.detect$データ有り月数_total<-as.numeric(user.data_8.detect$データ有り月数_total)
user.data_8.detect$データ有り月数_pre<-as.numeric(user.data_8.detect$データ有り月数_pre)
user.data_8.detect$データ有り月数_post<-as.numeric(user.data_8.detect$データ有り月数_post)

missForest対応していないデータは削除

user.data_8.detect_missF <-
  user.data_8.detect %>%
  ungroup() %>%  # グルーピングを解除
  select(-ID, -kencom登録年月, -baseline健診受診年月, -followup健診受診年月, 
         -資格取得年月, -資格喪失年月, -レセプトデータ開始年月, -レセプトデータ終了年月)

#IDは削除したくないため、取り出しておく。missForest関数の結果と後で統合するため。
id <- user.data_8.detect$ID

sapply(user.data_8.detect_missF, class)

##               gender                  age          weight_nz.b 
##             "factor"            "numeric"            "numeric" 
##      systolicBP_nz.b     diastolicBP_nz.b           ldl_c_nz.b 
##            "numeric"            "numeric"            "numeric" 
##           hdl_c_nz.b              TG_nz.b           HbA1c_nz.b 
##            "numeric"            "numeric"            "numeric" 
## データ有り月数_total   データ有り月数_pre  データ有り月数_post 
##            "numeric"            "numeric"            "numeric" 
## 月ごとの平均データ数        step_mean_pre       step_mean_post 
##            "numeric"            "numeric"            "numeric" 
##          step_sd_pre         step_sd_post     step_mean_pre.wd 
##            "numeric"            "numeric"            "numeric" 
##    step_mean_post.wd       step_sd_pre.wd      step_sd_post.wd 
##            "numeric"            "numeric"            "numeric" 
##     step_mean_pre.we    step_mean_post.we       step_sd_pre.we 
##            "numeric"            "numeric"            "numeric" 
##      step_sd_post.we            step.diff         step.diff.wd 
##            "numeric"            "numeric"            "numeric" 
##         step.diff.we          weight_nz.f      systolicBP_nz.f 
##            "numeric"            "numeric"            "numeric" 
##     diastolicBP_nz.f           ldl_c_nz.f           hdl_c_nz.f 
##            "numeric"            "numeric"            "numeric" 
##              TG_nz.f           HbA1c_nz.f       weight_nz.diff 
##            "numeric"            "numeric"            "numeric" 
##   systolicBP_nz.diff  diastolicBP_nz.diff        ldl_c_nz.diff 
##            "numeric"            "numeric"            "numeric" 
##        hdl_c_nz.diff           TG_nz.diff        HbA1c_nz.diff 
##            "numeric"            "numeric"            "numeric"

missForest実行

doParallelを使って並列処理をすると高速化するので、実際行う場合には並列化推奨。　　ただし、注意すべき点がいくつかあるので、本コード内では安全性を考慮して並列処理は行わない。並列化しなくても本データは1～2時間で補完できる（はず）。

imp_missForest <-
  missForest(xmis = as.data.frame(user.data_8.detect_missF),
             maxiter = 30,
             ntree = 100
             )

imp_missForest$OOBerror

##       NRMSE         PFC 
## 0.004831176 0.000000000

結果の確認

user.data_9_hokan <- imp_missForest$ximp
user.data_9_hokan <- cbind(ID = id, imp_missForest$ximp)　#IDを統合。IDは保持できている。
user.data_9_hokan <- 
  user.data_9_hokan %>% 
  inner_join(user.data_8.detect %>% select(ID,kencom登録年月, baseline健診受診年月, followup健診受診年月, 
                                         資格取得年月, 資格喪失年月, レセプトデータ開始年月, レセプトデータ終了年月), by = "ID")

user.data_9_hokan %>%
  write.csv(., "/Users/abekohei/Desktop/HDS演習＿DeSC/HDS_DeSC/user.data_9.欠測値補完済.csv", row.names = FALSE)

data3_hokan <- read.csv("/Users/abekohei/Desktop/HDS演習＿DeSC/HDS_DeSC/data3_exam.destructed_original_hokan.csv")

分析２-1

data3_hokan %>% head()

##     仮個人id 性別_sfl 年齢_sfl kencom登録年月 step.diff.quintile
## 1 0yuPvFowq6     男性       66     2016-04-01                  1
## 2 1OwsKnMNaE     男性       39     2016-04-01                  3
## 3 0IliCoWCZV     男性       31     2016-04-01                  3
## 4 M0tEnZHNql     女性       51     2016-04-01                  2
## 5 PDnTXele4g     男性       26     2016-04-01                  5
## 6 AecvtKe8cE     男性       34     2016-05-01                  2
##   step.diff.median.by.group weight_nz.diff systolicBP_nz.diff
## 1                -961.92398     -4.1390000           1.204490
## 2                  61.99986     -3.1120477           5.615931
## 3                  61.99986      0.7284926          -2.445457
## 4                -327.63873      4.0614129          24.935567
## 5                1581.09765     -2.6122737          10.506455
## 6                -327.63873     -0.3897352          -4.238583
##   diastolicBP_nz.diff ldl_c_nz.diff hdl_c_nz.diff TG_nz.diff HbA1c_nz.diff
## 1           2.6720062     5.3590352     0.4610505 -15.515152   -0.48755945
## 2          -3.6108605    -0.3891425     1.9976718  -2.879915   -0.21745953
## 3          -2.5427406   -22.6019218     4.9481587  -2.196534   -0.06833491
## 4           1.9345428   -15.3831264    -2.2091562 -19.505292   -0.16728154
## 5           8.2586546   -18.5112023   -18.5583183 104.989170   -0.08458658
## 6          -0.4893504   -10.5642779   -11.3340042  11.296307   -0.02670434
##   step_mean.diff baseline健診受診年月 followup健診受診年月 weight_nz.b
## 1     -623.47945           2015-04-01           2017-04-01    66.89074
## 2      -12.98046           2015-06-01           2017-04-01    90.51351
## 3      143.71995           2015-05-01           2017-05-01    71.95757
## 4     -301.81007           2015-05-01           2017-05-01    67.16708
## 5     1174.52970           2015-05-01           2017-05-01    70.95344
## 6     -576.22646           2015-06-01           2017-05-01    65.04586
##   systolicBP_nz.b diastolicBP_nz.b ldl_c_nz.b hdl_c_nz.b   TG_nz.b HbA1c_nz.b
## 1        109.9507         71.62864   108.8772   44.50094  76.35546   5.631184
## 2        119.0751         87.45228   117.3376   45.69706 108.15197   6.440423
## 3        133.8459         78.75107   118.8808   58.35234  67.63990   5.337701
## 4        112.6972         80.13369   124.2131   54.28470 132.65872   5.760370
## 5        101.7182         56.37320   155.5491   63.88320  72.98456   5.292819
## 6        111.6170         68.20776   136.7508   71.41820  78.06746   5.156067
##   weight_nz.f systolicBP_nz.f diastolicBP_nz.f ldl_c_nz.f hdl_c_nz.f   TG_nz.f
## 1    62.75174        111.1552         74.30065  114.23626   44.96199  60.84030
## 2    87.40146        124.6911         83.84142  116.94841   47.69473 105.27205
## 3    72.68606        131.4004         76.20833   96.27893   63.30050  65.44337
## 4    71.22849        137.6327         82.06824  108.82998   52.07554 113.15343
## 5    68.34116        112.2247         64.63186  137.03787   45.32488 177.97374
## 6    64.65613        107.3784         67.71841  126.18648   60.08419  89.36376
##   HbA1c_nz.f
## 1   5.143625
## 2   6.222963
## 3   5.269366
## 4   5.593088
## 5   5.208233
## 6   5.129363

data3_hokan %>% skimr::skim()

Data summary
Name	Piped data
Number of rows	5680
Number of columns	30
_______________________
Column type frequency:
character	5
numeric	25
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
仮個人id	1	10	10	5680
性別_sfl	1	2	2	2
kencom登録年月	1	10	10	24
baseline健診受診年月	1	10	10	32
followup健診受診年月	1	10	10	24

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
年齢_sfl	1	44.85	11.08	18.00	37.00	46.00	53.00	73.00	▂▅▇▆▁
step.diff.quintile	1	2.99	1.42	1.00	2.00	3.00	4.00	5.00	▇▇▇▇▇
step.diff.median.by.group	1	167.23	853.89	-961.92	-327.64	62.00	510.03	1581.10	▅▃▇▁▃
weight_nz.diff	1	0.37	4.09	-31.13	-2.05	0.44	2.92	19.59	▁▁▅▇▁
systolicBP_nz.diff	1	1.04	13.35	-59.82	-7.58	1.04	9.51	52.65	▁▁▇▃▁
diastolicBP_nz.diff	1	1.02	9.60	-37.00	-5.21	0.99	7.20	45.97	▁▃▇▂▁
ldl_c_nz.diff	1	1.47	22.48	-143.84	-10.55	1.73	14.31	139.08	▁▁▇▁▁
hdl_c_nz.diff	1	1.12	9.21	-43.70	-4.56	0.83	6.40	52.75	▁▂▇▁▁
TG_nz.diff	1	-0.01	72.32	-1279.42	-18.80	0.15	19.67	1146.49	▁▁▇▁▁
HbA1c_nz.diff	1	0.04	0.37	-4.51	-0.17	0.04	0.24	5.93	▁▁▇▁▁
step_mean.diff	1	209.38	1301.86	-6205.96	-442.92	58.09	660.11	12842.29	▁▇▁▁▁
weight_nz.b	1	64.08	12.36	32.24	54.99	63.62	71.77	139.68	▃▇▂▁▁
systolicBP_nz.b	1	117.88	15.13	75.98	107.22	117.10	127.38	209.34	▂▇▂▁▁
diastolicBP_nz.b	1	73.18	11.51	41.28	64.99	72.28	80.81	126.16	▂▇▆▁▁
ldl_c_nz.b	1	120.58	30.26	28.98	99.26	118.20	139.74	276.22	▁▇▅▁▁
hdl_c_nz.b	1	63.26	16.04	24.59	51.51	61.31	72.91	140.46	▂▇▃▁▁
TG_nz.b	1	102.81	82.10	20.62	57.46	81.49	119.74	1450.27	▇▁▁▁▁
HbA1c_nz.b	1	5.42	0.47	4.09	5.15	5.36	5.61	12.17	▇▂▁▁▁
weight_nz.f	1	64.45	12.35	31.64	55.43	64.10	71.80	141.43	▂▇▂▁▁
systolicBP_nz.f	1	118.92	15.38	77.58	108.04	118.40	128.88	204.01	▂▇▃▁▁
diastolicBP_nz.f	1	74.20	11.43	41.49	66.21	73.56	81.76	121.98	▁▇▇▂▁
ldl_c_nz.f	1	122.04	30.77	28.81	100.75	119.66	141.05	279.04	▁▇▅▁▁
hdl_c_nz.f	1	64.38	16.54	25.77	52.36	62.18	74.19	151.11	▃▇▃▁▁
TG_nz.f	1	102.80	81.76	19.96	57.61	81.43	122.58	1544.59	▇▁▁▁▁
HbA1c_nz.f	1	5.46	0.50	4.05	5.18	5.40	5.65	12.56	▇▂▁▁▁

colnames(data3_hokan)

##  [1] "仮個人id"                  "性別_sfl"                 
##  [3] "年齢_sfl"                  "kencom登録年月"           
##  [5] "step.diff.quintile"        "step.diff.median.by.group"
##  [7] "weight_nz.diff"            "systolicBP_nz.diff"       
##  [9] "diastolicBP_nz.diff"       "ldl_c_nz.diff"            
## [11] "hdl_c_nz.diff"             "TG_nz.diff"               
## [13] "HbA1c_nz.diff"             "step_mean.diff"           
## [15] "baseline健診受診年月"      "followup健診受診年月"     
## [17] "weight_nz.b"               "systolicBP_nz.b"          
## [19] "diastolicBP_nz.b"          "ldl_c_nz.b"               
## [21] "hdl_c_nz.b"                "TG_nz.b"                  
## [23] "HbA1c_nz.b"                "weight_nz.f"              
## [25] "systolicBP_nz.f"           "diastolicBP_nz.f"         
## [27] "ldl_c_nz.f"                "hdl_c_nz.f"               
## [29] "TG_nz.f"                   "HbA1c_nz.f"

data3_hokan <- data3_hokan %>% 
  rename(gender = 性別_sfl, age = 年齢_sfl) %>%
  mutate(gender = if_else(gender == "男性", "male", "female"))

#data3の確認

ggplot(data3_hokan,mapping = aes(x=step.diff.quintile,fill=gender))+
geom_bar()

hist(data3_hokan$age)

ggplot(data3_hokan, aes(x=age, fill=gender)) +
  geom_histogram(binwidth=1, position="identity",alpha=0.5) +
  scale_fill_brewer(palette="Set1") +
  labs(title="age", x="age", y="count") +
  theme_minimal()

hist(data3_hokan$step_mean.diff)

ggplot(data3_hokan,mapping = aes(x=weight_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3_hokan,mapping = aes(x=systolicBP_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3_hokan,mapping = aes(x=diastolicBP_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3_hokan,mapping = aes(x=ldl_c_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3_hokan,mapping = aes(x=hdl_c_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3_hokan,mapping = aes(x=TG_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3_hokan,mapping = aes(x=HbA1c_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=weight_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()

## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=systolicBP_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()

## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=diastolicBP_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()

## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=ldl_c_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()

## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=hdl_c_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()

## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=TG_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()

## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3_hokan,mapping = aes(x=step_mean.diff,y=HbA1c_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()

## `geom_smooth()` using formula = 'y ~ x'

data3_hokan_longer<- data3_hokan %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "value"
  )
ggplot(data3_hokan_longer, aes(x = step_mean.diff, y = value, color = gender)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  facet_wrap(~measure_type, scales = "free") +
  theme_bw()

## `geom_smooth()` using formula = 'y ~ x'

data3_hokan %>% group_by(gender,"HbA1c_nz.diff") %>% 
  summarise(mean_Hb=mean(HbA1c_nz.diff)) %>% 
  ggplot(mapping = aes(x=gender,y=mean_Hb,fill=gender))+
  geom_bar(stat="identity",position="dodge")+
  xlab("Gender") +ylab("HbA1c_diff")+
  theme_bw()

## `summarise()` has grouped output by 'gender'. You can override using the
## `.groups` argument.

t.test(HbA1c_nz.diff~gender,data=data3_hokan)

## 
##  Welch Two Sample t-test
## 
## data:  HbA1c_nz.diff by gender
## t = -0.50046, df = 5205.3, p-value = 0.6168
## alternative hypothesis: true difference in means between group female and group male is not equal to 0
## 95 percent confidence interval:
##  -0.02415687  0.01433144
## sample estimates:
## mean in group female   mean in group male 
##           0.03511848           0.04003119

#ヒストグラム作成

#step.diff.quintileごとの平均年齢
data3_hokan %>% 
  group_by(step.diff.quintile) %>% 
  summarise(mean_age=mean(age,na.rm=T)) %>%
  ggplot(mapping = aes(x=step.diff.quintile,y=mean_age))+
  geom_bar(stat="identity")+
  xlab("Step Difference Quintile") +
  ylab("Mean Age")+
  theme_bw()

#step.diff.quintileごとにおけるベースライン時と健診時の体重差

#step.diff.quintileごとにおけるベースライン時と健診時の体重差
data3_hokan %>% 
  group_by(step.diff.quintile) %>% 
  summarise(mean_weight=mean(weight_nz.diff,na.rm=T),
                             sd_weight=sd(weight_nz.diff,na.rm=T),
                             se_weight=sd_weight / sqrt(n())) %>%
  ggplot(mapping = aes(x=step.diff.quintile,y=mean_weight))+
  geom_errorbar(aes(ymin=mean_weight-se_weight,ymax=mean_weight+se_weight),width=0.3)+
  geom_bar(stat="identity")+
  xlab("Step Difference Quintile") +
  ylab("Changing Weight(Mean Weight)")+
  theme_bw()

data3_hokan %>% 
  group_by(step.diff.quintile, gender) %>% 
  summarise(mean_weight=mean(weight_nz.diff, na.rm=TRUE),
            sd_weight=sd(weight_nz.diff, na.rm=TRUE),
            se_weight=sd_weight / sqrt(n())) %>% 
  ggplot(mapping = aes(x=step.diff.quintile, y=mean_weight, fill=gender)) +
  geom_errorbar(aes(ymin=mean_weight-se_weight, ymax=mean_weight+se_weight), width=0.3,    
  position=position_dodge(width=0.5)) +
  geom_bar(stat="identity", position=position_dodge(width=0.5)) +
  xlab("Step Difference Quintile") +
  ylab("Changing Weight (Mean Weight)") +
  theme_bw() +
  scale_fill_manual(values = c("male" = "blue", "female" = "pink")) # ここで性別ごとの色を指定

## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.

data3_hokan %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.quintile, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n())) %>%
  ggplot(mapping = aes(x = step.diff.quintile, y =mean)) +
  geom_bar(stat = "identity",position = "dodge") +
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se),width=0.2)+
  facet_wrap(~measure_type,scales = "free")+
  theme_classic()

## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.

分析２-2

健診検査値変化量 = Β x step.diff.median.by.group+切片(intercept)

data3_hokan %>% head()

##     仮個人id gender age kencom登録年月 step.diff.quintile
## 1 0yuPvFowq6   male  66     2016-04-01                  1
## 2 1OwsKnMNaE   male  39     2016-04-01                  3
## 3 0IliCoWCZV   male  31     2016-04-01                  3
## 4 M0tEnZHNql female  51     2016-04-01                  2
## 5 PDnTXele4g   male  26     2016-04-01                  5
## 6 AecvtKe8cE   male  34     2016-05-01                  2
##   step.diff.median.by.group weight_nz.diff systolicBP_nz.diff
## 1                -961.92398     -4.1390000           1.204490
## 2                  61.99986     -3.1120477           5.615931
## 3                  61.99986      0.7284926          -2.445457
## 4                -327.63873      4.0614129          24.935567
## 5                1581.09765     -2.6122737          10.506455
## 6                -327.63873     -0.3897352          -4.238583
##   diastolicBP_nz.diff ldl_c_nz.diff hdl_c_nz.diff TG_nz.diff HbA1c_nz.diff
## 1           2.6720062     5.3590352     0.4610505 -15.515152   -0.48755945
## 2          -3.6108605    -0.3891425     1.9976718  -2.879915   -0.21745953
## 3          -2.5427406   -22.6019218     4.9481587  -2.196534   -0.06833491
## 4           1.9345428   -15.3831264    -2.2091562 -19.505292   -0.16728154
## 5           8.2586546   -18.5112023   -18.5583183 104.989170   -0.08458658
## 6          -0.4893504   -10.5642779   -11.3340042  11.296307   -0.02670434
##   step_mean.diff baseline健診受診年月 followup健診受診年月 weight_nz.b
## 1     -623.47945           2015-04-01           2017-04-01    66.89074
## 2      -12.98046           2015-06-01           2017-04-01    90.51351
## 3      143.71995           2015-05-01           2017-05-01    71.95757
## 4     -301.81007           2015-05-01           2017-05-01    67.16708
## 5     1174.52970           2015-05-01           2017-05-01    70.95344
## 6     -576.22646           2015-06-01           2017-05-01    65.04586
##   systolicBP_nz.b diastolicBP_nz.b ldl_c_nz.b hdl_c_nz.b   TG_nz.b HbA1c_nz.b
## 1        109.9507         71.62864   108.8772   44.50094  76.35546   5.631184
## 2        119.0751         87.45228   117.3376   45.69706 108.15197   6.440423
## 3        133.8459         78.75107   118.8808   58.35234  67.63990   5.337701
## 4        112.6972         80.13369   124.2131   54.28470 132.65872   5.760370
## 5        101.7182         56.37320   155.5491   63.88320  72.98456   5.292819
## 6        111.6170         68.20776   136.7508   71.41820  78.06746   5.156067
##   weight_nz.f systolicBP_nz.f diastolicBP_nz.f ldl_c_nz.f hdl_c_nz.f   TG_nz.f
## 1    62.75174        111.1552         74.30065  114.23626   44.96199  60.84030
## 2    87.40146        124.6911         83.84142  116.94841   47.69473 105.27205
## 3    72.68606        131.4004         76.20833   96.27893   63.30050  65.44337
## 4    71.22849        137.6327         82.06824  108.82998   52.07554 113.15343
## 5    68.34116        112.2247         64.63186  137.03787   45.32488 177.97374
## 6    64.65613        107.3784         67.71841  126.18648   60.08419  89.36376
##   HbA1c_nz.f
## 1   5.143625
## 2   6.222963
## 3   5.269366
## 4   5.593088
## 5   5.208233
## 6   5.129363

step.diff.median.by.groupについて

unique(data3_hokan$step.diff.median.by.group)

## [1] -961.92398   61.99986 -327.63873 1581.09765  510.03305

n_distinct(data3_hokan$step.diff.median.by.group)

## [1] 5

summary(data3_hokan$step.diff.median.by.group)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -961.9  -327.6    62.0   167.2   510.0  1581.1

str(data3_hokan$step.diff.median.by.group)

##  num [1:5680] -962 62 62 -328 1581 ...

data4_hokan<-data3_hokan %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.median.by.group, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n()))

## `summarise()` has grouped output by 'step.diff.median.by.group'. You can
## override using the `.groups` argument.

data4_hokan %>% head()

## # A tibble: 6 × 5
## # Groups:   step.diff.median.by.group [1]
##   step.diff.median.by.group measure_type          mean     sd     se
##                       <dbl> <chr>                <dbl>  <dbl>  <dbl>
## 1                     -962. HbA1c_nz.diff       0.0492  0.417 0.0123
## 2                     -962. TG_nz.diff          3.39   82.1   2.42  
## 3                     -962. diastolicBP_nz.diff 1.35    9.08  0.268 
## 4                     -962. hdl_c_nz.diff       0.156   9.32  0.275 
## 5                     -962. ldl_c_nz.diff       3.03   23.4   0.688 
## 6                     -962. systolicBP_nz.diff  1.69   12.7   0.375

#dBP
DBP_hokan<-
  data4_hokan %>% 
  filter(measure_type=="diastolicBP_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="dBP_diff",.before=Estimate)

#sBP
SBP_hokan<-
  data4_hokan %>% 
  filter(measure_type=="systolicBP_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="sBP_diff",.before=Estimate)

#weight
weight_hokan<-
  data4_hokan %>% 
  filter(measure_type=="weight_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="weight_diff",.before=Estimate)

#HbA1c
HbA1c_hokan<-
  data4_hokan %>% 
  filter(measure_type=="HbA1c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="HbA1c_diff",.before=Estimate)

#TG
TG_hokan<-
  data4_hokan %>% 
  filter(measure_type=="TG_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="TG_diff",.before=Estimate)

#HDL
HDL_hokan<-
  data4_hokan %>% 
  filter(measure_type=="hdl_c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="HDL_diff",.before=Estimate)

#LDL
LDL_hokan<-
  data4_hokan %>% 
  filter(measure_type=="ldl_c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="LDL_diff",.before=Estimate)

data5_hokan<-bind_rows(DBP_hokan,SBP_hokan,weight_hokan,HbA1c_hokan,TG_hokan,HDL_hokan,LDL_hokan)

全体のsBP,weight,HbA1c,HDLについては有意差あり

data5_hokan %>% 
  mutate("p<0.05"=if_else(p_value<0.05,"*",""))

##                                      term      Estimate     p_value p<0.05
## step.diff.median.by.group...1    dBP_diff -2.851582e-04 0.065543559       
## step.diff.median.by.group...2    sBP_diff -4.436297e-04 0.007693330      *
## step.diff.median.by.group...3 weight_diff -3.547857e-04 0.003060491      *
## step.diff.median.by.group...4  HbA1c_diff -1.516801e-05 0.023672830      *
## step.diff.median.by.group...5     TG_diff -2.229034e-03 0.319833692       
## step.diff.median.by.group...6    HDL_diff  8.257078e-04 0.007198414      *
## step.diff.median.by.group...7    LDL_diff -6.019312e-04 0.249781215

HDS_DeSC_欠測値補完

阿部

2024-01-21