R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

pacman::p_load(tidyverse,readxl,ggplot2,dplyr,tableone,ggpubr,ggrepel,DBI,skimr,dbplyr,pryr,lubridate, survival, survminer,car,svglite,MatchIt, cobalt)

#データ読み込み

data1<-read.csv("/Users/abekohei/Downloads/2_1日目講義前配布用/script_for_provision/processed_data/data1_step.destructed.csv")
data2<-read.csv("/Users/abekohei/Downloads/2_1日目講義前配布用/script_for_provision/processed_data/data2_monthly_step.destructed.csv")
data3<-read.csv("/Users/abekohei/Downloads/2_1日目講義前配布用/script_for_provision/processed_data/data3_exam.destructed.csv")

分析1-1

data1 %>% skimr::skim()

Data summary
Name	Piped data
Number of rows	12602
Number of columns	19
_______________________
Column type frequency:
character	4
numeric	15
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
仮個人id	1	10	10	12602
性別_sfl	1	2	2	2
kencom登録年月	1	10	10	27
baseline健診受診年月	1	10	10	38

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
年齢_sfl	0	1.00	44.72	11.04	18.00	37.00	46.00	53.00	73.00	▂▅▇▆▁
step_mean.diff	0	1.00	509.85	1594.22	-6205.96	-307.56	200.83	979.56	24369.72	▅▇▁▁▁
step_mean.b	0	1.00	5641.75	2685.98	25.30	3386.36	5544.94	7509.58	20954.08	▆▇▂▁▁
step_mean.f	0	1.00	6151.60	2723.39	12.77	4085.90	6017.96	7872.06	30572.20	▇▇▁▁▁
step_sd.b	0	1.00	3089.95	1172.33	46.16	2245.54	2999.51	3770.87	7677.58	▂▇▆▂▁
step_sd.f	0	1.00	3420.28	1412.43	47.54	2547.88	3219.49	4026.36	36661.56	▇▁▁▁▁
step_mean.diff.wd	0	1.00	533.68	1684.35	-7282.11	-305.09	220.83	1038.35	23533.30	▁▇▁▁▁
step_mean.diff.we	0	1.00	462.63	1748.29	-9706.07	-439.25	198.55	1016.13	26459.46	▁▇▁▁▁
weight_nz.b	4	1.00	62.87	12.65	31.87	53.18	62.05	70.81	139.68	▃▇▂▁▁
systolicBP_nz.b	5	1.00	118.19	15.92	75.16	106.95	117.17	127.86	209.34	▂▇▃▁▁
diastolicBP_nz.b	5	1.00	73.24	11.82	38.80	64.90	72.40	80.78	128.57	▁▇▆▁▁
ldl_c_nz.b	35	1.00	121.50	31.45	28.53	99.48	119.03	141.13	283.21	▁▇▅▁▁
hdl_c_nz.b	33	1.00	64.82	16.41	24.59	52.76	63.10	74.91	152.03	▂▇▃▁▁
TG_nz.b	33	1.00	100.59	78.44	20.16	56.83	80.02	118.76	1588.09	▇▁▁▁▁
HbA1c_nz.b	330	0.97	5.44	0.54	4.02	5.15	5.37	5.62	13.12	▇▁▁▁▁

集団の平均値を算出

# 平均と標準偏差を計算
mean_data <- mean(data1$step_mean.diff)
sd_data <- sd(data1$step_mean.diff)
se_data <- sd_data / sqrt(length(data1$step_mean.diff)) 
mean_data

## [1] 509.8529

sd_data

## [1] 1594.224

se_data

## [1] 14.20135

ヒストグラムを作成

#年齢
ggplot(data1,mapping = aes(x=年齢_sfl))+
  geom_histogram(bins = 50)+
  theme_bw()

#体重
ggplot(data1,mapping = aes(x=weight_nz.b))+
  geom_histogram(bins = 100)+
  theme_bw()

## Warning: Removed 4 rows containing non-finite values (`stat_bin()`).

#歩数変化量
ggplot(data1,mapping = aes(x=step_mean.diff))+
  geom_histogram(bins = 100)+
  #xlim(-5000,5000)+
  theme_bw()

# Kolmogorov Smirnovテストの実行
#ks.test(data1, "pnorm", mean = mean_data, sd = sd_data)

正規分布ではないのでウィルコクソン符号順位検定の実行

#wilcox.test(data1$step_mean.diff, mu = 0, paired = FALSE)

t検定実施。結果有意差あり→有意に増加している

data1$step_mean.diff %>% t.test()

## 
##  One Sample t-test
## 
## data:  .
## t = 35.902, df = 12601, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  482.0161 537.6897
## sample estimates:
## mean of x 
##  509.8529

分析1-2

data2 %>% skimr::skim()

Data summary
Name	Piped data
Number of rows	302448
Number of columns	10
_______________________
Column type frequency:
character	4
numeric	6
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
仮個人id	1	10	10	12602
性別_sfl	1	2	2	2
kencom登録年月	1	10	10	27
年月	1	10	10	50

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
年齢_sfl	0	1	44.72	11.04	18	37.00	46.00	53.00	73.00	▂▅▇▆▁
登録からの月数	0	1	-0.50	6.92	-12	-6.25	-0.50	5.25	11.00	▇▇▆▇▇
step_mean	0	1	5893.71	2976.58	2	3573.42	5695.32	7752.38	84714.43	▇▁▁▁▁
step_sd	0	1	2955.37	1477.54	0	1978.24	2771.46	3671.65	47763.00	▇▁▁▁▁
step_mean.wd	51	1	6210.87	3225.61	2	3631.09	6026.95	8276.27	84697.91	▇▁▁▁▁
step_mean.we	275	1	5085.80	3278.90	2	2735.50	4435.88	6703.11	93572.88	▇▁▁▁▁

#ヒストグラム作成

ggplot(data2,mapping = aes(x=step_mean))+
  geom_histogram(bins = 100)+
  theme_bw()

## 登録月と歩数の関係

se_data2 <- sd(data2$step_mean) / sqrt(length(data2$step_mean))

data2 %>% group_by(登録からの月数) %>% 
  summarise(mean_step=mean(step_mean),
            sd_step=sd(step_mean),
            se_step=sd_step / sqrt(n())) %>%
  ggplot(mapping = aes(x=登録からの月数,y=mean_step))+
  geom_line()+
  geom_point()+
  geom_errorbar(aes(ymin=mean_step-se_step,ymax=mean_step+se_step),width=0.1)+
  geom_vline(xintercept = 0,linetype="dotted")+
  xlab("registration_month")+ylab("mean_step")+
  theme_bw()

#週末と平日における歩数の違い(wd=平日、we=週末)

#平日
data2 %>% filter(!is.na(step_mean.wd)) %>% 
  group_by(登録からの月数) %>% 
  summarise(mean_step=mean(step_mean.wd),
            sd_step=sd(step_mean.wd),
            se_step=sd_step / sqrt(n())) %>%
  ggplot(mapping = aes(x=登録からの月数,y=mean_step))+
  geom_line()+
  geom_point()+
  geom_errorbar(aes(ymin=mean_step-se_step,ymax=mean_step+se_step),width=0.1)+
  geom_vline(xintercept = 0,linetype="dotted")+
  xlab("registration_month")+ylab("mean_step")+
  theme_bw()

#週末
data2 %>% filter(!is.na(step_mean.we)) %>% 
  group_by(登録からの月数) %>% 
  summarise(mean_step=mean(step_mean.we),
            sd_step=sd(step_mean.we),
            se_step=sd_step / sqrt(n())) %>%
  ggplot(mapping = aes(x=登録からの月数,y=mean_step))+
  geom_line()+
  geom_point()+
  geom_errorbar(aes(ymin=mean_step-se_step,ymax=mean_step+se_step),width=0.1)+
  geom_vline(xintercept = 0,linetype="dotted")+
  xlab("registration_month")+ylab("mean_step")+
  theme_bw()

#平日と週末の歩数について

data2_we <- data2 %>%
  pivot_longer(
    cols = c(step_mean.wd, step_mean.we),
    names_to = "day_type",
    values_to = "mean_step"
  ) %>%
  mutate(day_type = if_else(day_type == "step_mean.we", "Weekend", "Weekday")) %>%
　distinct(仮個人id,登録からの月数,day_type,.keep_all = T) %>%
  group_by(登録からの月数, day_type) %>%
  summarise(
    mean_step1 = mean(mean_step, na.rm = TRUE),
    sd_step = sd(mean_step, na.rm = TRUE),
    se_step = sd_step / sqrt(n()))

## `summarise()` has grouped output by '登録からの月数'. You can override using
## the `.groups` argument.

data2_we

## # A tibble: 48 × 5
## # Groups:   登録からの月数 [24]
##    登録からの月数 day_type mean_step1 sd_step se_step
##             <int> <chr>         <dbl>   <dbl>   <dbl>
##  1            -12 Weekday       5992.   3191.    28.4
##  2            -12 Weekend       4953.   3090.    27.5
##  3            -11 Weekday       5930.   3065.    27.3
##  4            -11 Weekend       4848.   3062.    27.3
##  5            -10 Weekday       5938.   3106.    27.7
##  6            -10 Weekend       4822.   3013.    26.8
##  7             -9 Weekday       5925.   3118.    27.8
##  8             -9 Weekend       4823.   3010.    26.8
##  9             -8 Weekday       5901.   3102.    27.6
## 10             -8 Weekend       4823.   3081.    27.4
## # ℹ 38 more rows

平日と週末における歩数の違い

  ggplot(data2_we, aes(x = 登録からの月数, y = mean_step1, color = day_type)) +
  geom_line() +
  geom_point() +
  geom_errorbar(aes(ymin = mean_step1 - se_step, ymax = mean_step1 + se_step)) +
  geom_vline(xintercept = 0, linetype = "dotted") +
  xlab("registration_month") + ylab("mean_step") +
  theme_bw() +
  scale_color_manual(values = c("Weekend" = "blue", "Weekday" = "red"))

分析２-1

data3 %>% head()

##     仮個人id 性別_sfl 年齢_sfl kencom登録年月 step.diff.quintile
## 1 0yuPvFowq6     男性       66     2016-04-01                  1
## 2 1OwsKnMNaE     男性       39     2016-04-01                  3
## 3 0IliCoWCZV     男性       31     2016-04-01                  3
## 4 M0tEnZHNql     女性       51     2016-04-01                  2
## 5 PDnTXele4g     男性       26     2016-04-01                  5
## 6 AecvtKe8cE     男性       34     2016-05-01                  2
##   step.diff.median.by.group weight_nz.diff systolicBP_nz.diff
## 1                -963.51562     -4.1390000           1.204490
## 2                  62.69344     -3.1120477           5.615931
## 3                  62.69344      0.7284926          -2.445457
## 4                -327.36152      4.0614129          24.935567
## 5                1596.78921     -2.6122737          10.506455
## 6                -327.36152     -0.3897352          -4.238583
##   diastolicBP_nz.diff ldl_c_nz.diff hdl_c_nz.diff TG_nz.diff HbA1c_nz.diff
## 1           2.6720062     5.3590352     0.4610505 -15.515152   -0.48755945
## 2          -3.6108605    -0.3891425     1.9976718  -2.879915   -0.21745953
## 3          -2.5427406   -22.6019218     4.9481587  -2.196534   -0.06833491
## 4           1.9345428   -15.3831264    -2.2091562 -19.505292   -0.16728154
## 5           8.2586546   -18.5112023   -18.5583183 104.989170   -0.08458658
## 6          -0.4893504   -10.5642779   -11.3340042  11.296307   -0.02670434
##   step_mean.diff baseline健診受診年月 followup健診受診年月 weight_nz.b
## 1     -623.47945           2015-04-01           2017-04-01    66.89074
## 2      -12.98046           2015-06-01           2017-04-01    90.51351
## 3      143.71995           2015-05-01           2017-05-01    71.95757
## 4     -301.81007           2015-05-01           2017-05-01    67.16708
## 5     1174.52970           2015-05-01           2017-05-01    70.95344
## 6     -576.22646           2015-06-01           2017-05-01    65.04586
##   systolicBP_nz.b diastolicBP_nz.b ldl_c_nz.b hdl_c_nz.b   TG_nz.b HbA1c_nz.b
## 1        109.9507         71.62864   108.8772   44.50094  76.35546   5.631184
## 2        119.0751         87.45228   117.3376   45.69706 108.15197   6.440423
## 3        133.8459         78.75107   118.8808   58.35234  67.63990   5.337701
## 4        112.6972         80.13369   124.2131   54.28470 132.65872   5.760370
## 5        101.7182         56.37320   155.5491   63.88320  72.98456   5.292819
## 6        111.6170         68.20776   136.7508   71.41820  78.06746   5.156067
##   weight_nz.f systolicBP_nz.f diastolicBP_nz.f ldl_c_nz.f hdl_c_nz.f   TG_nz.f
## 1    62.75174        111.1552         74.30065  114.23626   44.96199  60.84030
## 2    87.40146        124.6911         83.84142  116.94841   47.69473 105.27205
## 3    72.68606        131.4004         76.20833   96.27893   63.30050  65.44337
## 4    71.22849        137.6327         82.06824  108.82998   52.07554 113.15343
## 5    68.34116        112.2247         64.63186  137.03787   45.32488 177.97374
## 6    64.65613        107.3784         67.71841  126.18648   60.08419  89.36376
##   HbA1c_nz.f
## 1   5.143625
## 2   6.222963
## 3   5.269366
## 4   5.593088
## 5   5.208233
## 6   5.129363

data3 <- data3 %>% 
  rename(gender = 性別_sfl, age = 年齢_sfl) %>%
  mutate(gender = if_else(gender == "男性", "male", "female"))

data3 %>% skimr::skim()

Data summary
Name	Piped data
Number of rows	5473
Number of columns	30
_______________________
Column type frequency:
character	5
numeric	25
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
仮個人id	1	10	10	5473
gender	1	4	6	2
kencom登録年月	1	10	10	24
baseline健診受診年月	1	10	10	32
followup健診受診年月	1	10	10	24

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
age	1	44.86	11.10	18.00	37.00	46.00	53.00	73.00	▂▅▇▆▁
step.diff.quintile	1	3.00	1.41	1.00	2.00	3.00	4.00	5.00	▇▇▇▇▇
step.diff.median.by.group	1	177.03	860.34	-963.52	-327.36	62.69	516.39	1596.79	▅▃▇▁▅
weight_nz.diff	1	0.36	4.11	-31.13	-2.06	0.44	2.92	19.59	▁▁▅▇▁
systolicBP_nz.diff	1	1.09	13.39	-59.82	-7.55	1.11	9.60	52.65	▁▂▇▃▁
diastolicBP_nz.diff	1	1.01	9.59	-37.00	-5.21	0.96	7.19	45.97	▁▃▇▂▁
ldl_c_nz.diff	1	1.44	22.55	-143.84	-10.60	1.72	14.37	108.90	▁▁▇▅▁
hdl_c_nz.diff	1	1.13	9.27	-43.70	-4.57	0.84	6.42	52.75	▁▂▇▁▁
TG_nz.diff	1	-0.25	73.23	-1279.42	-19.13	0.07	19.70	1146.49	▁▁▇▁▁
HbA1c_nz.diff	1	0.04	0.37	-4.51	-0.18	0.04	0.25	5.93	▁▁▇▁▁
step_mean.diff	1	220.59	1308.52	-6205.96	-437.21	62.69	668.41	12842.29	▁▇▁▁▁
weight_nz.b	1	64.09	12.38	32.24	54.99	63.60	71.81	139.68	▃▇▂▁▁
systolicBP_nz.b	1	117.95	15.14	75.98	107.29	117.14	127.39	209.34	▂▇▂▁▁
diastolicBP_nz.b	1	73.31	11.45	41.28	65.08	72.42	80.89	126.16	▁▇▆▁▁
ldl_c_nz.b	1	120.87	30.34	28.98	99.45	118.56	140.01	276.22	▁▇▅▁▁
hdl_c_nz.b	1	63.33	16.13	24.59	51.51	61.31	73.07	140.46	▂▇▃▁▁
TG_nz.b	1	103.59	82.98	20.62	57.83	82.07	120.17	1450.27	▇▁▁▁▁
HbA1c_nz.b	1	5.43	0.47	4.09	5.15	5.37	5.62	12.17	▇▂▁▁▁
weight_nz.f	1	64.45	12.36	31.64	55.44	64.08	71.84	141.43	▂▇▂▁▁
systolicBP_nz.f	1	119.05	15.35	77.58	108.21	118.54	129.03	204.01	▂▇▃▁▁
diastolicBP_nz.f	1	74.32	11.39	41.49	66.35	73.70	81.85	119.35	▁▇▇▂▁
ldl_c_nz.f	1	122.32	30.75	28.81	101.02	119.97	141.47	279.04	▁▇▅▁▁
hdl_c_nz.f	1	64.46	16.63	25.77	52.39	62.23	74.44	151.11	▃▇▃▁▁
TG_nz.f	1	103.34	82.45	19.96	57.97	81.88	123.17	1544.59	▇▁▁▁▁
HbA1c_nz.f	1	5.46	0.49	4.05	5.18	5.41	5.65	12.56	▇▂▁▁▁

colnames(data3)

##  [1] "仮個人id"                  "gender"                   
##  [3] "age"                       "kencom登録年月"           
##  [5] "step.diff.quintile"        "step.diff.median.by.group"
##  [7] "weight_nz.diff"            "systolicBP_nz.diff"       
##  [9] "diastolicBP_nz.diff"       "ldl_c_nz.diff"            
## [11] "hdl_c_nz.diff"             "TG_nz.diff"               
## [13] "HbA1c_nz.diff"             "step_mean.diff"           
## [15] "baseline健診受診年月"      "followup健診受診年月"     
## [17] "weight_nz.b"               "systolicBP_nz.b"          
## [19] "diastolicBP_nz.b"          "ldl_c_nz.b"               
## [21] "hdl_c_nz.b"                "TG_nz.b"                  
## [23] "HbA1c_nz.b"                "weight_nz.f"              
## [25] "systolicBP_nz.f"           "diastolicBP_nz.f"         
## [27] "ldl_c_nz.f"                "hdl_c_nz.f"               
## [29] "TG_nz.f"                   "HbA1c_nz.f"

#data3の確認

ggplot(data3,mapping = aes(x=step.diff.quintile,fill=gender))+
geom_bar()

hist(data3$age)

ggplot(data3, aes(x=age, fill=gender)) +
  geom_histogram(binwidth=1, position="identity",alpha=0.5) +
  scale_fill_brewer(palette="Set1") +
  labs(title="age", x="age", y="count") +
  theme_minimal()

hist(data3$step_mean.diff)

ggplot(data3,mapping = aes(x=weight_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3,mapping = aes(x=systolicBP_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3,mapping = aes(x=diastolicBP_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3,mapping = aes(x=ldl_c_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3,mapping = aes(x=hdl_c_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3,mapping = aes(x=TG_nz.diff,fill=gender))+
geom_histogram(bins = 100)

ggplot(data3,mapping = aes(x=HbA1c_nz.diff,fill=gender))+
geom_histogram(bins = 100)

step_mean.diff=(step_mean.f - step_mean.b) HbA1c_nz.diff=(HbA1c_nz.f - HbA1c_nz.b)

ggplot(data3,mapping = aes(x=step_mean.diff,y=weight_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()

## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3,mapping = aes(x=step_mean.diff,y=systolicBP_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()

## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3,mapping = aes(x=step_mean.diff,y=diastolicBP_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()

## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3,mapping = aes(x=step_mean.diff,y=ldl_c_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()

## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3,mapping = aes(x=step_mean.diff,y=hdl_c_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()

## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3,mapping = aes(x=step_mean.diff,y=TG_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()

## `geom_smooth()` using formula = 'y ~ x'

ggplot(data3,mapping = aes(x=step_mean.diff,y=HbA1c_nz.diff,color=gender))+
  geom_point()+
  geom_smooth(method = "lm",se=F)+
  theme_bw()

## `geom_smooth()` using formula = 'y ~ x'

data3_longer<- data3 %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "value"
  )
ggplot(data3_longer, aes(x = step_mean.diff, y = value, color = gender)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  facet_wrap(~measure_type, scales = "free") +
  theme_bw()

## `geom_smooth()` using formula = 'y ~ x'

data3 %>% group_by(gender,"HbA1c_nz.diff") %>% 
  summarise(mean_Hb=mean(HbA1c_nz.diff)) %>% 
  ggplot(mapping = aes(x=gender,y=mean_Hb,fill=gender))+
  geom_bar(stat="identity",position="dodge")+
  xlab("Gender") +ylab("HbA1c_diff")+
  theme_bw()

## `summarise()` has grouped output by 'gender'. You can override using the
## `.groups` argument.

t.test(HbA1c_nz.diff~gender,data=data3)

## 
##  Welch Two Sample t-test
## 
## data:  HbA1c_nz.diff by gender
## t = -0.55778, df = 5003.3, p-value = 0.577
## alternative hypothesis: true difference in means between group female and group male is not equal to 0
## 95 percent confidence interval:
##  -0.02555737  0.01423556
## sample estimates:
## mean in group female   mean in group male 
##           0.03397754           0.03963844

#ヒストグラム作成

#step.diff.quintileごとの平均年齢
data3 %>% 
  group_by(step.diff.quintile) %>% 
  summarise(mean_age=mean(age,na.rm=T)) %>%
  ggplot(mapping = aes(x=step.diff.quintile,y=mean_age))+
  geom_bar(stat="identity")+
  xlab("Step Difference Quintile") +
  ylab("Mean Age")+
  theme_bw()

#step.diff.quintileごとにおけるベースライン時と健診時の体重差

#step.diff.quintileごとにおけるベースライン時と健診時の体重差
data3 %>% 
  group_by(step.diff.quintile) %>% 
  summarise(mean_weight=mean(weight_nz.diff,na.rm=T),
                             sd_weight=sd(weight_nz.diff,na.rm=T),
                             se_weight=sd_weight / sqrt(n())) %>%
  ggplot(mapping = aes(x=step.diff.quintile,y=mean_weight))+
  geom_errorbar(aes(ymin=mean_weight-se_weight,ymax=mean_weight+se_weight),width=0.3)+
  geom_bar(stat="identity")+
  xlab("Step Difference Quintile") +
  ylab("Changing Weight(Mean Weight)")+
  theme_bw()

data3 %>% 
  group_by(step.diff.quintile, gender) %>% 
  summarise(mean_weight=mean(weight_nz.diff, na.rm=TRUE),
            sd_weight=sd(weight_nz.diff, na.rm=TRUE),
            se_weight=sd_weight / sqrt(n())) %>% 
  ggplot(mapping = aes(x=step.diff.quintile, y=mean_weight, fill=gender)) +
  geom_errorbar(aes(ymin=mean_weight-se_weight, ymax=mean_weight+se_weight), width=0.3,    
  position=position_dodge(width=0.5)) +
  geom_bar(stat="identity", position=position_dodge(width=0.5)) +
  xlab("Step Difference Quintile") +
  ylab("Changing Weight (Mean Weight)") +
  theme_bw() +
  scale_fill_manual(values = c("male" = "blue", "female" = "pink")) # ここで性別ごとの色を指定

## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.

歩数変化量と健診検査値変化量のグラフ

colnames(data3)

##  [1] "仮個人id"                  "gender"                   
##  [3] "age"                       "kencom登録年月"           
##  [5] "step.diff.quintile"        "step.diff.median.by.group"
##  [7] "weight_nz.diff"            "systolicBP_nz.diff"       
##  [9] "diastolicBP_nz.diff"       "ldl_c_nz.diff"            
## [11] "hdl_c_nz.diff"             "TG_nz.diff"               
## [13] "HbA1c_nz.diff"             "step_mean.diff"           
## [15] "baseline健診受診年月"      "followup健診受診年月"     
## [17] "weight_nz.b"               "systolicBP_nz.b"          
## [19] "diastolicBP_nz.b"          "ldl_c_nz.b"               
## [21] "hdl_c_nz.b"                "TG_nz.b"                  
## [23] "HbA1c_nz.b"                "weight_nz.f"              
## [25] "systolicBP_nz.f"           "diastolicBP_nz.f"         
## [27] "ldl_c_nz.f"                "hdl_c_nz.f"               
## [29] "TG_nz.f"                   "HbA1c_nz.f"

data3 %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.quintile, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n())) %>%
  ggplot(mapping = aes(x = step.diff.quintile, y =mean)) +
  geom_bar(stat = "identity",position = "dodge") +
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se),width=0.2)+
  facet_wrap(~measure_type,scales = "free")+
  theme_classic()

## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.

data3 %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.quintile, measure_type, gender) %>%
  summarise(mean = mean(diff_value, na.rm = TRUE),
            sd = sd(diff_value, na.rm = TRUE),
            se = sd / sqrt(n())) %>%
  ggplot(mapping = aes(x = step.diff.quintile, y = mean, fill = gender)) +
  geom_bar(stat = "identity", position = position_dodge()) +  
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.2, position = position_dodge(0.9)) +
  facet_wrap(~measure_type, scales = "free") +
  scale_fill_manual(values = c("male" = "blue", "female" = "red")) +  
  theme_classic()

## `summarise()` has grouped output by 'step.diff.quintile', 'measure_type'. You
## can override using the `.groups` argument.

分析２-2

健診検査値変化量 = Β x step.diff.median.by.group+切片(intercept)

data3 %>% head()

##     仮個人id gender age kencom登録年月 step.diff.quintile
## 1 0yuPvFowq6   male  66     2016-04-01                  1
## 2 1OwsKnMNaE   male  39     2016-04-01                  3
## 3 0IliCoWCZV   male  31     2016-04-01                  3
## 4 M0tEnZHNql female  51     2016-04-01                  2
## 5 PDnTXele4g   male  26     2016-04-01                  5
## 6 AecvtKe8cE   male  34     2016-05-01                  2
##   step.diff.median.by.group weight_nz.diff systolicBP_nz.diff
## 1                -963.51562     -4.1390000           1.204490
## 2                  62.69344     -3.1120477           5.615931
## 3                  62.69344      0.7284926          -2.445457
## 4                -327.36152      4.0614129          24.935567
## 5                1596.78921     -2.6122737          10.506455
## 6                -327.36152     -0.3897352          -4.238583
##   diastolicBP_nz.diff ldl_c_nz.diff hdl_c_nz.diff TG_nz.diff HbA1c_nz.diff
## 1           2.6720062     5.3590352     0.4610505 -15.515152   -0.48755945
## 2          -3.6108605    -0.3891425     1.9976718  -2.879915   -0.21745953
## 3          -2.5427406   -22.6019218     4.9481587  -2.196534   -0.06833491
## 4           1.9345428   -15.3831264    -2.2091562 -19.505292   -0.16728154
## 5           8.2586546   -18.5112023   -18.5583183 104.989170   -0.08458658
## 6          -0.4893504   -10.5642779   -11.3340042  11.296307   -0.02670434
##   step_mean.diff baseline健診受診年月 followup健診受診年月 weight_nz.b
## 1     -623.47945           2015-04-01           2017-04-01    66.89074
## 2      -12.98046           2015-06-01           2017-04-01    90.51351
## 3      143.71995           2015-05-01           2017-05-01    71.95757
## 4     -301.81007           2015-05-01           2017-05-01    67.16708
## 5     1174.52970           2015-05-01           2017-05-01    70.95344
## 6     -576.22646           2015-06-01           2017-05-01    65.04586
##   systolicBP_nz.b diastolicBP_nz.b ldl_c_nz.b hdl_c_nz.b   TG_nz.b HbA1c_nz.b
## 1        109.9507         71.62864   108.8772   44.50094  76.35546   5.631184
## 2        119.0751         87.45228   117.3376   45.69706 108.15197   6.440423
## 3        133.8459         78.75107   118.8808   58.35234  67.63990   5.337701
## 4        112.6972         80.13369   124.2131   54.28470 132.65872   5.760370
## 5        101.7182         56.37320   155.5491   63.88320  72.98456   5.292819
## 6        111.6170         68.20776   136.7508   71.41820  78.06746   5.156067
##   weight_nz.f systolicBP_nz.f diastolicBP_nz.f ldl_c_nz.f hdl_c_nz.f   TG_nz.f
## 1    62.75174        111.1552         74.30065  114.23626   44.96199  60.84030
## 2    87.40146        124.6911         83.84142  116.94841   47.69473 105.27205
## 3    72.68606        131.4004         76.20833   96.27893   63.30050  65.44337
## 4    71.22849        137.6327         82.06824  108.82998   52.07554 113.15343
## 5    68.34116        112.2247         64.63186  137.03787   45.32488 177.97374
## 6    64.65613        107.3784         67.71841  126.18648   60.08419  89.36376
##   HbA1c_nz.f
## 1   5.143625
## 2   6.222963
## 3   5.269366
## 4   5.593088
## 5   5.208233
## 6   5.129363

##step.diff.median.by.groupについて

unique(data3$step.diff.median.by.group)

## [1] -963.51562   62.69344 -327.36152 1596.78921  516.39142

n_distinct(data3$step.diff.median.by.group)

## [1] 5

summary(data3$step.diff.median.by.group)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -963.52 -327.36   62.69  177.03  516.39 1596.79

str(data3$step.diff.median.by.group)

##  num [1:5473] -963.5 62.7 62.7 -327.4 1596.8 ...

data4<-data3 %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.median.by.group, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n()))

## `summarise()` has grouped output by 'step.diff.median.by.group'. You can
## override using the `.groups` argument.

data4 %>% head()

## # A tibble: 6 × 5
## # Groups:   step.diff.median.by.group [1]
##   step.diff.median.by.group measure_type          mean     sd     se
##                       <dbl> <chr>                <dbl>  <dbl>  <dbl>
## 1                     -964. HbA1c_nz.diff       0.0488  0.426 0.0129
## 2                     -964. TG_nz.diff          3.25   83.9   2.54  
## 3                     -964. diastolicBP_nz.diff 1.39    9.12  0.276 
## 4                     -964. hdl_c_nz.diff       0.196   9.42  0.285 
## 5                     -964. ldl_c_nz.diff       3.10   23.6   0.712 
## 6                     -964. systolicBP_nz.diff  1.78   12.9   0.389

#dBP
DBP<-
  data4 %>% 
  filter(measure_type=="diastolicBP_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="dBP_diff",.before=Estimate)

#sBP
SBP<-
  data4 %>% 
  filter(measure_type=="systolicBP_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="sBP_diff",.before=Estimate)

#weight
weight<-
  data4 %>% 
  filter(measure_type=="weight_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="weight_diff",.before=Estimate)

#HbA1c
HbA1c<-
  data4 %>% 
  filter(measure_type=="HbA1c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="HbA1c_diff",.before=Estimate)

#TG
TG<-
  data4 %>% 
  filter(measure_type=="TG_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="TG_diff",.before=Estimate)

#HDL
HDL<-
  data4 %>% 
  filter(measure_type=="hdl_c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="HDL_diff",.before=Estimate)

#LDL
LDL<-
  data4 %>% 
  filter(measure_type=="ldl_c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="LDL_diff",.before=Estimate)

data5<-bind_rows(DBP,SBP,weight,HbA1c,TG,HDL,LDL)
data5

##                                      term      Estimate     p_value
## step.diff.median.by.group...1    dBP_diff -2.890070e-04 0.106801745
## step.diff.median.by.group...2    sBP_diff -4.237653e-04 0.022124846
## step.diff.median.by.group...3 weight_diff -3.551869e-04 0.003169958
## step.diff.median.by.group...4  HbA1c_diff -1.468281e-05 0.020568124
## step.diff.median.by.group...5     TG_diff -2.401945e-03 0.279411725
## step.diff.median.by.group...6    HDL_diff  7.970896e-04 0.007896832
## step.diff.median.by.group...7    LDL_diff -5.979974e-04 0.288273409

全体のsBP,weight,HbA1c,HDLについては有意差あり

data5 %>% 
  mutate("p<0.05"=if_else(p_value<0.05,"*",""))

##                                      term      Estimate     p_value p<0.05
## step.diff.median.by.group...1    dBP_diff -2.890070e-04 0.106801745       
## step.diff.median.by.group...2    sBP_diff -4.237653e-04 0.022124846      *
## step.diff.median.by.group...3 weight_diff -3.551869e-04 0.003169958      *
## step.diff.median.by.group...4  HbA1c_diff -1.468281e-05 0.020568124      *
## step.diff.median.by.group...5     TG_diff -2.401945e-03 0.279411725       
## step.diff.median.by.group...6    HDL_diff  7.970896e-04 0.007896832      *
## step.diff.median.by.group...7    LDL_diff -5.979974e-04 0.288273409

#step.diff.median.by.groupを５つに分類して各検査値の変化量を比較

data6 <- data3 %>% 
  mutate(
   step.diff.median.group = round(step.diff.median.by.group, 3),
   step.diff.category = case_when(
   step.diff.median.group == round(-963.51562, 3) ~ 1,
   step.diff.median.group == round(-327.36152, 3) ~ 2,
   step.diff.median.group == round(62.69344, 3) ~ 3,
   step.diff.median.group == round(516.39142, 3) ~ 4,
   step.diff.median.group == round(1596.78921, 3) ~ 5,
   TRUE ~ NA 　　　　　　　　　　　　　　　　　　　　# どの条件にも当てはまらない場合、NAを割り当てる
    )
  )
data6 %>% head()

##     仮個人id gender age kencom登録年月 step.diff.quintile
## 1 0yuPvFowq6   male  66     2016-04-01                  1
## 2 1OwsKnMNaE   male  39     2016-04-01                  3
## 3 0IliCoWCZV   male  31     2016-04-01                  3
## 4 M0tEnZHNql female  51     2016-04-01                  2
## 5 PDnTXele4g   male  26     2016-04-01                  5
## 6 AecvtKe8cE   male  34     2016-05-01                  2
##   step.diff.median.by.group weight_nz.diff systolicBP_nz.diff
## 1                -963.51562     -4.1390000           1.204490
## 2                  62.69344     -3.1120477           5.615931
## 3                  62.69344      0.7284926          -2.445457
## 4                -327.36152      4.0614129          24.935567
## 5                1596.78921     -2.6122737          10.506455
## 6                -327.36152     -0.3897352          -4.238583
##   diastolicBP_nz.diff ldl_c_nz.diff hdl_c_nz.diff TG_nz.diff HbA1c_nz.diff
## 1           2.6720062     5.3590352     0.4610505 -15.515152   -0.48755945
## 2          -3.6108605    -0.3891425     1.9976718  -2.879915   -0.21745953
## 3          -2.5427406   -22.6019218     4.9481587  -2.196534   -0.06833491
## 4           1.9345428   -15.3831264    -2.2091562 -19.505292   -0.16728154
## 5           8.2586546   -18.5112023   -18.5583183 104.989170   -0.08458658
## 6          -0.4893504   -10.5642779   -11.3340042  11.296307   -0.02670434
##   step_mean.diff baseline健診受診年月 followup健診受診年月 weight_nz.b
## 1     -623.47945           2015-04-01           2017-04-01    66.89074
## 2      -12.98046           2015-06-01           2017-04-01    90.51351
## 3      143.71995           2015-05-01           2017-05-01    71.95757
## 4     -301.81007           2015-05-01           2017-05-01    67.16708
## 5     1174.52970           2015-05-01           2017-05-01    70.95344
## 6     -576.22646           2015-06-01           2017-05-01    65.04586
##   systolicBP_nz.b diastolicBP_nz.b ldl_c_nz.b hdl_c_nz.b   TG_nz.b HbA1c_nz.b
## 1        109.9507         71.62864   108.8772   44.50094  76.35546   5.631184
## 2        119.0751         87.45228   117.3376   45.69706 108.15197   6.440423
## 3        133.8459         78.75107   118.8808   58.35234  67.63990   5.337701
## 4        112.6972         80.13369   124.2131   54.28470 132.65872   5.760370
## 5        101.7182         56.37320   155.5491   63.88320  72.98456   5.292819
## 6        111.6170         68.20776   136.7508   71.41820  78.06746   5.156067
##   weight_nz.f systolicBP_nz.f diastolicBP_nz.f ldl_c_nz.f hdl_c_nz.f   TG_nz.f
## 1    62.75174        111.1552         74.30065  114.23626   44.96199  60.84030
## 2    87.40146        124.6911         83.84142  116.94841   47.69473 105.27205
## 3    72.68606        131.4004         76.20833   96.27893   63.30050  65.44337
## 4    71.22849        137.6327         82.06824  108.82998   52.07554 113.15343
## 5    68.34116        112.2247         64.63186  137.03787   45.32488 177.97374
## 6    64.65613        107.3784         67.71841  126.18648   60.08419  89.36376
##   HbA1c_nz.f step.diff.median.group step.diff.category
## 1   5.143625               -963.516                  1
## 2   6.222963                 62.693                  3
## 3   5.269366                 62.693                  3
## 4   5.593088               -327.362                  2
## 5   5.208233               1596.789                  5
## 6   5.129363               -327.362                  2

data6 %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.category, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n())) %>%
  ggplot(mapping = aes(x = step.diff.category, y =mean)) +
  geom_bar(stat = "identity") +
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se),width=0.2)+
  facet_wrap(~measure_type,scales = "free")+
  theme_classic()

## `summarise()` has grouped output by 'step.diff.category'. You can override
## using the `.groups` argument.

data6 %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.category, measure_type, gender) %>%  # 追加: gender
  summarise(mean = mean(diff_value, na.rm = TRUE),
            sd = sd(diff_value, na.rm = TRUE),
            se = sd / sqrt(n())) %>%
  ggplot(mapping = aes(x = step.diff.category, y = mean, fill = gender)) + 
  geom_bar(stat = "identity", position = position_dodge()) +  
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.2, position = position_dodge(0.9)) +
  facet_wrap(~measure_type, scales = "free") +
  scale_fill_manual(values = c("male" = "blue", "female" = "red")) +  
  theme_classic()

## `summarise()` has grouped output by 'step.diff.category', 'measure_type'. You
## can override using the `.groups` argument.

#write_csv(data6, "/Users/abekohei/Downloads/2_1日目講義前配布用/script_for_provision/data6.csv")

分析追加

性差での違いを調べる

data3 %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.quintile, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n())) %>%
  ggplot(mapping = aes(x = step.diff.quintile, y =mean)) +
  geom_bar(stat = "identity",position = "dodge") +
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se),width=0.2)+
  facet_wrap(~measure_type,scales = "free")+
  theme_classic()

## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.

data3 %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.quintile, measure_type, gender) %>%
  summarise(mean = mean(diff_value, na.rm = TRUE),
            sd = sd(diff_value, na.rm = TRUE),
            se = sd / sqrt(n())) %>%
  ggplot(mapping = aes(x = step.diff.quintile, y = mean, fill = gender)) +
  geom_bar(stat = "identity", position = position_dodge()) +  
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.2, position = position_dodge(0.9)) +
  facet_wrap(~measure_type, scales = "free") +
  scale_fill_manual(values = c("male" = "blue", "female" = "red")) +  
  theme_classic()

## `summarise()` has grouped output by 'step.diff.quintile', 'measure_type'. You
## can override using the `.groups` argument.

男性データのみを抽出

data_male<-data3 %>% filter(gender=="male")

df_male1<-data_male %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.median.by.group, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n()))

## `summarise()` has grouped output by 'step.diff.median.by.group'. You can
## override using the `.groups` argument.

#dBP
DBP_male<-
  df_male1 %>% 
  filter(measure_type=="diastolicBP_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="dBP_diff",.before=Estimate)

#sBP
SBP_male<-
  df_male1 %>% 
  filter(measure_type=="systolicBP_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="sBP_diff",.before=Estimate)

#weight
weight_male<-
  df_male1 %>% 
  filter(measure_type=="weight_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="weight_diff",.before=Estimate)

#HbA1c
HbA1c_male<-
  df_male1 %>% 
  filter(measure_type=="HbA1c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="HbA1c_diff",.before=Estimate)

#TG
TG_male<-
  df_male1 %>% 
  filter(measure_type=="TG_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="TG_diff",.before=Estimate)

#HDL
HDL_male<-
  df_male1 %>% 
  filter(measure_type=="hdl_c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="HDL_diff",.before=Estimate)

#LDL
LDL_male<-
  df_male1 %>% 
  filter(measure_type=="ldl_c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="LDL_diff",.before=Estimate)

df5_male<-bind_rows(DBP_male,SBP_male,weight_male,HbA1c_male,TG_male,HDL_male,LDL_male)
df5_male

##                                      term      Estimate     p_value
## step.diff.median.by.group...1    dBP_diff -7.193161e-04 0.034426241
## step.diff.median.by.group...2    sBP_diff -6.272823e-04 0.116603247
## step.diff.median.by.group...3 weight_diff -3.520797e-04 0.019218203
## step.diff.median.by.group...4  HbA1c_diff -1.805442e-05 0.114183801
## step.diff.median.by.group...5     TG_diff -2.843390e-03 0.192499844
## step.diff.median.by.group...6    HDL_diff  8.258661e-04 0.008869557
## step.diff.median.by.group...7    LDL_diff -2.149112e-04 0.719694014

男性データではdBP_diff、weight_diff、HDL_diffについて有意差あり

df5_male %>% 
  mutate("p<0.05"=if_else(p_value<0.05,"*",""))

##                                      term      Estimate     p_value p<0.05
## step.diff.median.by.group...1    dBP_diff -7.193161e-04 0.034426241      *
## step.diff.median.by.group...2    sBP_diff -6.272823e-04 0.116603247       
## step.diff.median.by.group...3 weight_diff -3.520797e-04 0.019218203      *
## step.diff.median.by.group...4  HbA1c_diff -1.805442e-05 0.114183801       
## step.diff.median.by.group...5     TG_diff -2.843390e-03 0.192499844       
## step.diff.median.by.group...6    HDL_diff  8.258661e-04 0.008869557      *
## step.diff.median.by.group...7    LDL_diff -2.149112e-04 0.719694014

女性データのみを抽出

data_female<-data3 %>% filter(gender=="female")

df_female1<-data_female %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.median.by.group, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n()))

## `summarise()` has grouped output by 'step.diff.median.by.group'. You can
## override using the `.groups` argument.

#dBP
DBP_female<-
  df_female1 %>% 
  filter(measure_type=="diastolicBP_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="dBP_diff",.before=Estimate)

#sBP
SBP_female<-
  df_female1 %>% 
  filter(measure_type=="systolicBP_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="sBP_diff",.before=Estimate)

#weight
weight_female<-
  df_female1 %>% 
  filter(measure_type=="weight_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="weight_diff",.before=Estimate)

#HbA1c
HbA1c_female<-
  df_female1 %>% 
  filter(measure_type=="HbA1c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="HbA1c_diff",.before=Estimate)

#TG
TG_female<-
  df_female1 %>% 
  filter(measure_type=="TG_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="TG_diff",.before=Estimate)

#HDL
HDL_female<-
  df_female1 %>% 
  filter(measure_type=="hdl_c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="HDL_diff",.before=Estimate)

#LDL
LDL_female<-
  df_female1 %>% 
  filter(measure_type=="ldl_c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="LDL_diff",.before=Estimate)

df5_female<-bind_rows(DBP_female,SBP_female,weight_female,HbA1c_female,TG_female,HDL_female,LDL_female)
df5_female

##                                      term      Estimate    p_value
## step.diff.median.by.group...1    dBP_diff  3.640916e-04 0.03953484
## step.diff.median.by.group...2    sBP_diff -1.192206e-04 0.77770724
## step.diff.median.by.group...3 weight_diff -3.569323e-04 0.01954224
## step.diff.median.by.group...4  HbA1c_diff -9.891568e-06 0.18287912
## step.diff.median.by.group...5     TG_diff -1.700495e-03 0.52461853
## step.diff.median.by.group...6    HDL_diff  7.601839e-04 0.01828471
## step.diff.median.by.group...7    LDL_diff -1.163230e-03 0.09320509

女性データではdBP_diff、weight_diff、HDL_diffについて有意差あり

df5_female %>% 
  mutate("p<0.05"=if_else(p_value<0.05,"*",""))

##                                      term      Estimate    p_value p<0.05
## step.diff.median.by.group...1    dBP_diff  3.640916e-04 0.03953484      *
## step.diff.median.by.group...2    sBP_diff -1.192206e-04 0.77770724       
## step.diff.median.by.group...3 weight_diff -3.569323e-04 0.01954224      *
## step.diff.median.by.group...4  HbA1c_diff -9.891568e-06 0.18287912       
## step.diff.median.by.group...5     TG_diff -1.700495e-03 0.52461853       
## step.diff.median.by.group...6    HDL_diff  7.601839e-04 0.01828471      *
## step.diff.median.by.group...7    LDL_diff -1.163230e-03 0.09320509

心血管リスク因子(HT,DL,DM)一つでも該当する者を対象とする

HT＝sBP130以上orDL＝LDL:140以上orHDL:40未満orTG:175以上orDM＝6.5

data3_highrisk<-data3 %>% 
  filter(systolicBP_nz.b>=130 | ldl_c_nz.b>=140 | hdl_c_nz.b<40 | TG_nz.b>=175 | HbA1c_nz.b>=6.5)

data3_highrisk %>% nrow()

## [1] 2449

data3_highrisk %>% skim()

Data summary
Name	Piped data
Number of rows	2449
Number of columns	30
_______________________
Column type frequency:
character	5
numeric	25
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
仮個人id	1	10	10	2449
gender	1	4	6	2
kencom登録年月	1	10	10	24
baseline健診受診年月	1	10	10	30
followup健診受診年月	1	10	10	23

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
age	1	44.97	11.06	18.00	37.00	46.00	53.00	71.00	▂▆▇▇▂
step.diff.quintile	1	3.01	1.42	1.00	2.00	3.00	4.00	5.00	▇▇▇▇▇
step.diff.median.by.group	1	182.03	862.23	-963.52	-327.36	62.69	516.39	1596.79	▅▅▇▁▅
weight_nz.diff	1	0.08	4.39	-31.13	-2.38	0.20	2.76	19.59	▁▁▅▇▁
systolicBP_nz.diff	1	-1.36	14.36	-59.82	-10.51	-1.26	8.19	52.65	▁▂▇▃▁
diastolicBP_nz.diff	1	0.28	10.04	-37.00	-6.16	0.51	6.96	45.97	▁▅▇▂▁
ldl_c_nz.diff	1	-2.53	26.60	-143.84	-16.50	-1.30	13.35	98.39	▁▁▇▇▁
hdl_c_nz.diff	1	1.18	8.91	-36.25	-4.26	0.93	6.33	52.75	▁▆▇▁▁
TG_nz.diff	1	-7.55	99.27	-1279.42	-30.67	-3.42	22.42	1146.49	▁▁▇▁▁
HbA1c_nz.diff	1	0.06	0.45	-4.51	-0.16	0.06	0.27	5.93	▁▁▇▁▁
step_mean.diff	1	248.10	1328.27	-4286.43	-442.90	68.12	687.88	10651.17	▁▇▁▁▁
weight_nz.b	1	68.45	12.38	37.62	60.23	68.06	75.84	125.84	▂▇▅▁▁
systolicBP_nz.b	1	126.01	16.05	81.63	114.44	127.11	136.04	209.34	▂▇▅▁▁
diastolicBP_nz.b	1	78.32	11.65	44.97	70.29	78.13	86.02	126.16	▁▇▇▂▁
ldl_c_nz.b	1	138.83	31.24	30.71	116.48	142.68	158.84	276.22	▁▅▇▁▁
hdl_c_nz.b	1	59.09	16.18	24.59	47.49	56.80	68.42	140.46	▃▇▂▁▁
TG_nz.b	1	139.89	108.49	23.75	76.56	110.50	169.93	1450.27	▇▁▁▁▁
HbA1c_nz.b	1	5.54	0.59	4.11	5.21	5.45	5.72	12.17	▇▂▁▁▁
weight_nz.f	1	68.53	12.35	38.57	60.55	67.86	75.87	124.70	▂▇▅▁▁
systolicBP_nz.f	1	124.66	15.29	82.37	114.66	123.93	133.82	204.01	▂▇▅▁▁
diastolicBP_nz.f	1	78.60	11.18	41.49	71.04	78.39	86.03	119.35	▁▅▇▃▁
ldl_c_nz.f	1	136.30	32.06	44.80	114.14	135.71	156.69	279.04	▂▇▇▁▁
hdl_c_nz.f	1	60.27	16.66	25.77	48.25	57.54	69.14	150.95	▅▇▂▁▁
TG_nz.f	1	132.34	104.76	20.33	73.02	108.35	158.94	1544.59	▇▁▁▁▁
HbA1c_nz.f	1	5.60	0.60	4.32	5.26	5.51	5.77	12.56	▇▁▁▁▁

data3_highrisk %>% head()

##     仮個人id gender age kencom登録年月 step.diff.quintile
## 1 0IliCoWCZV   male  31     2016-04-01                  3
## 2 PDnTXele4g   male  26     2016-04-01                  5
## 3 EPbHvE0cSI female  22     2016-05-01                  2
## 4 qBqHi7ri2W   male  42     2016-04-01                  2
## 5 TRZYorNdbw female  32     2016-05-01                  3
## 6 ULSKJQhwhT   male  25     2016-06-01                  1
##   step.diff.median.by.group weight_nz.diff systolicBP_nz.diff
## 1                  62.69344      0.7284926          -2.445457
## 2                1596.78921     -2.6122737          10.506455
## 3                -327.36152     -4.4093780           2.458827
## 4                -327.36152     -2.2889677           8.490237
## 5                  62.69344      8.3624345         -16.204654
## 6                -963.51562      3.7261152          28.878772
##   diastolicBP_nz.diff ldl_c_nz.diff hdl_c_nz.diff TG_nz.diff HbA1c_nz.diff
## 1           -2.542741    -22.601922      4.948159  -2.196534   -0.06833491
## 2            8.258655    -18.511202    -18.558318 104.989170   -0.08458658
## 3           -4.471324    -41.671288    -11.151447  25.578212    0.36740637
## 4           10.265643      4.338429     -8.284412  20.048275    0.36238565
## 5           -5.522645    -20.854080    -12.814424  74.910630   -0.28642599
## 6            3.430650      7.664912      6.415059  -2.612318   -0.06264475
##   step_mean.diff baseline健診受診年月 followup健診受診年月 weight_nz.b
## 1       143.7200           2015-05-01           2017-05-01    71.95757
## 2      1174.5297           2015-05-01           2017-05-01    70.95344
## 3      -466.6492           2015-05-01           2017-06-01    70.90351
## 4      -490.7139           2015-05-01           2017-06-01    70.25011
## 5       129.7318           2015-06-01           2017-06-01    71.35653
## 6      -635.0747           2015-06-01           2017-06-01    57.70602
##   systolicBP_nz.b diastolicBP_nz.b ldl_c_nz.b hdl_c_nz.b   TG_nz.b HbA1c_nz.b
## 1       133.84588         78.75107   118.8808   58.35234  67.63990   5.337701
## 2       101.71822         56.37320   155.5491   63.88320  72.98456   5.292819
## 3       115.42029         73.12555   165.4906   48.76714 111.67693   5.506758
## 4        93.26939         55.46555   152.4000   51.88232  99.73815   4.618830
## 5       112.29083         57.39880   142.6868   58.44698  77.56937   5.520969
## 6       139.42054         76.05343   127.1050   66.96992  59.32766   5.753736
##   weight_nz.f systolicBP_nz.f diastolicBP_nz.f ldl_c_nz.f hdl_c_nz.f   TG_nz.f
## 1    72.68606       131.40042         76.20833   96.27893   63.30050  65.44337
## 2    68.34116       112.22468         64.63186  137.03787   45.32488 177.97374
## 3    66.49414       117.87911         68.65423  123.81931   37.61569 137.25515
## 4    67.96114       101.75963         65.73120  156.73848   43.59791 119.78642
## 5    79.71897        96.08617         51.87616  121.83272   45.63256 152.48000
## 6    61.43213       168.29932         79.48408  134.76990   73.38498  56.71534
##   HbA1c_nz.f
## 1   5.269366
## 2   5.208233
## 3   5.874164
## 4   4.981216
## 5   5.234543
## 6   5.691091

ggplot(data3_highrisk,mapping = aes(x=age,fill=gender))+
  geom_histogram(binwidth=5, position="identity",alpha=0.5) +
  facet_wrap(~gender,scales = "free")+
  theme_bw()

data3_highrisk %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.quintile, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n())) %>%
  ggplot(mapping = aes(x = step.diff.quintile, y =mean)) +
  geom_bar(stat = "identity",position = "dodge") +
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se),width=0.2)+
  facet_wrap(~measure_type,scales = "free")+
  theme_classic()

## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.

data4_highrisk<-data3_highrisk %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.median.by.group, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n()))

## `summarise()` has grouped output by 'step.diff.median.by.group'. You can
## override using the `.groups` argument.

data4_highrisk %>% head()

## # A tibble: 6 × 5
## # Groups:   step.diff.median.by.group [1]
##   step.diff.median.by.group measure_type           mean      sd     se
##                       <dbl> <chr>                 <dbl>   <dbl>  <dbl>
## 1                     -964. HbA1c_nz.diff        0.0698   0.538 0.0244
## 2                     -964. TG_nz.diff          -6.20   110.    5.01  
## 3                     -964. diastolicBP_nz.diff  0.969    9.61  0.436 
## 4                     -964. hdl_c_nz.diff        0.266    8.90  0.404 
## 5                     -964. ldl_c_nz.diff       -0.860   27.0   1.23  
## 6                     -964. systolicBP_nz.diff   0.146   14.3   0.650

#dBP
DBP_highrisk<-
  data4_highrisk %>% 
  filter(measure_type=="diastolicBP_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>%
  filter(row.names(.)=="step.diff.median.by.group") %>%
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>%
  mutate(term="dBP_diff",.before=Estimate)

#sBP
SBP_highrisk<-
  data4_highrisk %>% 
  filter(measure_type=="systolicBP_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="sBP_diff",.before=Estimate)

#weight
weight_highrisk<-
  data4_highrisk %>% 
  filter(measure_type=="weight_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="weight_diff",.before=Estimate)

#HbA1c
HbA1c_highrisk<-
  data4_highrisk %>% 
  filter(measure_type=="HbA1c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="HbA1c_diff",.before=Estimate)

#TG
TG_highrisk<-
  data4_highrisk %>% 
  filter(measure_type=="TG_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="TG_diff",.before=Estimate)

#HDL
HDL_highrisk<-
  data4_highrisk %>% 
  filter(measure_type=="hdl_c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="HDL_diff",.before=Estimate)

#LDL
LDL_highrisk<-
  data4_highrisk %>% 
  filter(measure_type=="ldl_c_nz.diff") %>%
  lm(mean~step.diff.median.by.group,data=.) %>% 
  summary() %>% 
  .$coefficients %>% 
  as.data.frame() %>% 
  filter(row.names(.)=="step.diff.median.by.group") %>% 
  dplyr::select(Estimate,p_value="Pr(>|t|)") %>% 
  mutate(term="LDL_diff",.before=Estimate)

data5_highrisk<-bind_rows(DBP_highrisk,SBP_highrisk,weight_highrisk,HbA1c_highrisk,TG_highrisk,HDL_highrisk,LDL_highrisk)
data5_highrisk

##                                      term      Estimate     p_value
## step.diff.median.by.group...1    dBP_diff -6.197701e-04 0.019402126
## step.diff.median.by.group...2    sBP_diff -9.449150e-04 0.018892579
## step.diff.median.by.group...3 weight_diff -5.140357e-04 0.000219201
## step.diff.median.by.group...4  HbA1c_diff -1.941723e-05 0.015689553
## step.diff.median.by.group...5     TG_diff -1.804097e-03 0.589938244
## step.diff.median.by.group...6    HDL_diff  9.991996e-04 0.013094852
## step.diff.median.by.group...7    LDL_diff -5.655664e-04 0.444863251

心血管リスク因子を一つでも持つ者のdBP,sBP,weight,HbA1c,HDLについては有意差あり

data5_highrisk %>% 
  mutate("p<0.05"=if_else(p_value<0.05,"*",""))

##                                      term      Estimate     p_value p<0.05
## step.diff.median.by.group...1    dBP_diff -6.197701e-04 0.019402126      *
## step.diff.median.by.group...2    sBP_diff -9.449150e-04 0.018892579      *
## step.diff.median.by.group...3 weight_diff -5.140357e-04 0.000219201      *
## step.diff.median.by.group...4  HbA1c_diff -1.941723e-05 0.015689553      *
## step.diff.median.by.group...5     TG_diff -1.804097e-03 0.589938244       
## step.diff.median.by.group...6    HDL_diff  9.991996e-04 0.013094852      *
## step.diff.median.by.group...7    LDL_diff -5.655664e-04 0.444863251

data3でベースライン時に心血管ハイリスク患者(HT,DL,DM:３つある)に限定して調べる

HT＝sBP130以上、DL＝LDL:140以上orHDL:40未満orTG:175以上、DM＝6.5

data3_high<-data3 %>% 
  filter(systolicBP_nz.b>=130) %>% 
  filter(ldl_c_nz.b>=140 | hdl_c_nz.b<40 | TG_nz.b>=175) %>% 
  filter(HbA1c_nz.b>=6.5)

data3_high %>% nrow()

## [1] 31

data3_high %>% skim()

Data summary
Name	Piped data
Number of rows	31
Number of columns	30
_______________________
Column type frequency:
character	5
numeric	25
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
仮個人id	1	10	10	31
gender	1	4	6	2
kencom登録年月	1	10	10	9
baseline健診受診年月	1	10	10	12
followup健診受診年月	1	10	10	8

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
age	1	45.00	12.80	23.00	35.50	43.00	53.00	71.00	▅▇▆▅▃
step.diff.quintile	1	3.00	1.71	1.00	1.00	3.00	5.00	5.00	▇▃▂▃▇
step.diff.median.by.group	1	234.74	1073.62	-963.52	-963.52	62.69	1596.79	1596.79	▇▃▆▁▇
weight_nz.diff	1	-2.81	4.15	-13.73	-4.92	-2.81	-0.24	6.22	▁▅▇▇▂
systolicBP_nz.diff	1	-8.50	18.52	-51.36	-15.55	-9.64	5.73	25.19	▂▂▇▅▂
diastolicBP_nz.diff	1	-6.37	10.96	-24.31	-14.70	-4.82	0.37	14.28	▅▆▇▃▅
ldl_c_nz.diff	1	-9.39	51.50	-143.84	-25.34	-4.79	18.95	88.81	▁▂▇▇▂
hdl_c_nz.diff	1	2.26	9.68	-20.54	-2.14	2.78	7.66	24.60	▂▂▇▃▁
TG_nz.diff	1	-32.13	269.77	-504.79	-114.13	-28.89	9.30	1146.49	▂▇▁▁▁
HbA1c_nz.diff	1	-0.06	2.04	-4.51	-1.02	0.08	0.44	5.93	▂▃▇▁▁
step_mean.diff	1	164.75	1371.49	-2638.22	-637.24	87.20	1128.13	3017.17	▂▇▅▆▂
weight_nz.b	1	82.01	15.90	59.30	70.81	80.98	88.85	120.46	▇▅▆▂▂
systolicBP_nz.b	1	144.86	15.84	130.98	134.71	140.20	146.54	192.79	▇▃▁▁▁
diastolicBP_nz.b	1	89.44	9.31	68.94	85.10	89.16	91.76	111.44	▂▂▇▁▂
ldl_c_nz.b	1	137.93	33.37	67.33	109.47	141.31	162.84	201.55	▂▅▇▅▃
hdl_c_nz.b	1	51.16	14.73	32.33	41.78	45.88	57.57	89.24	▇▇▃▂▂
TG_nz.b	1	240.11	157.52	67.15	126.42	183.14	292.91	611.58	▇▅▁▁▂
HbA1c_nz.b	1	7.62	1.42	6.53	6.72	6.97	7.80	12.17	▇▂▁▁▁
weight_nz.f	1	79.20	14.76	59.26	69.45	78.01	87.70	114.79	▇▇▇▁▂
systolicBP_nz.f	1	136.36	14.70	106.65	126.08	136.07	146.41	167.78	▂▇▇▆▃
diastolicBP_nz.f	1	83.06	9.38	61.77	77.04	82.42	89.67	100.17	▂▃▇▆▅
ldl_c_nz.f	1	128.55	36.40	53.48	102.15	131.36	156.85	204.44	▂▇▇▇▂
hdl_c_nz.f	1	53.41	13.46	25.77	45.65	51.13	60.04	92.02	▂▇▅▂▁
TG_nz.f	1	207.98	253.93	70.63	109.11	136.71	199.15	1483.99	▇▁▁▁▁
HbA1c_nz.f	1	7.55	1.45	5.61	6.60	7.10	8.03	12.56	▇▇▁▁▁

ggplot(data3_high,mapping = aes(x=age,fill=gender))+
  geom_histogram(binwidth=5, position="identity",alpha=0.5) +
  facet_wrap(~gender,scales = "free")+
  theme_bw()

data3_high %>% 
  pivot_longer(
    cols = c(weight_nz.diff, systolicBP_nz.diff, diastolicBP_nz.diff,
             ldl_c_nz.diff, hdl_c_nz.diff, TG_nz.diff, HbA1c_nz.diff),
    names_to = "measure_type",
    values_to = "diff_value"
  ) %>%
  group_by(step.diff.quintile, measure_type) %>%
  summarise(mean=mean(diff_value,na.rm=T),
                             sd=sd(diff_value,na.rm=T),
                             se=sd / sqrt(n())) %>%
  ggplot(mapping = aes(x = step.diff.quintile, y =mean)) +
  geom_bar(stat = "identity",position = "dodge") +
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se),width=0.2)+
  facet_wrap(~measure_type,scales = "free")+
  theme_classic()

## `summarise()` has grouped output by 'step.diff.quintile'. You can override
## using the `.groups` argument.

DeSC

阿部

2024-01-09

R Markdown

分析1-1

集団の平均値を算出

ヒストグラムを作成

t検定実施。結果有意差あり→有意に増加している

分析1-2

平日と週末における歩数の違い

分析２-1

歩数変化量と健診検査値変化量のグラフ

分析２-2

全体のsBP,weight,HbA1c,HDLについては有意差あり

分析追加

性差での違いを調べる

男性データのみを抽出

男性データではdBP_diff、weight_diff、HDL_diffについて有意差あり

女性データのみを抽出

女性データではdBP_diff、weight_diff、HDL_diffについて有意差あり

心血管リスク因子(HT,DL,DM)一つでも該当する者を対象とする

心血管リスク因子を一つでも持つ者のdBP,sBP,weight,HbA1c,HDLについては有意差あり

data3でベースライン時に心血管ハイリスク患者(HT,DL,DM:３つある)に限定して調べる