library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(viridis)
## Loading required package: viridisLite
library(ggthemes)
library(tidyr)
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## √ tibble 2.1.3 √ stringr 1.4.0
## √ readr 1.3.1 √ forcats 0.5.0
## √ purrr 0.3.3
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lattice)
library(Ecdat)
## Loading required package: Ecfun
##
## Attaching package: 'Ecfun'
## The following object is masked from 'package:base':
##
## sign
##
## Attaching package: 'Ecdat'
## The following object is masked from 'package:datasets':
##
## Orange
library(gapminder)
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(grid)
dta <-read.csv("C:/tmp/S/run1.csv",header = T)
names(dta)[c(1,1)] <- c("RANDID")
dta <- na.omit(dta)
dtanew <- dta %>%
filter(PERIOD == "1" & PREVCHD == "0")
dtanew$TIMECHDyear<-dtanew$TIMECHD/365.25
dtanew$BMIff <- with(dtanew, cut(BMI, ordered=T, breaks=c(0,18.5, 24, 27,100),labels=c("underweight", "normal", "overweight","obese")))
dtaA<- knitr::kable(anova(lm(dtanew$TIMECHDyear ~ dtanew$BMIff, data=dtanew)))
dtanew <- dtanew %>% mutate(BMIF=cut(BMI,
breaks=quantile(BMI,
breaks=c(0,18.5, 24, 27,100)),
label=c("underweight", "normal", "overweight","obese"),
ordered=T,
include.lowest=T))
dtanew$ANYCHD<- as.numeric(dtanew$ANYCHD)
dtanew$ANYCHDf<-ifelse(dtanew$ANYCHD>= 0.5, "Sick", "Health") #ANYCHD分為大於等於.5(=Sick),小於(=Health)
dtanew$SYSBP<- as.numeric(dtanew$SYSBP)
dtanew$CURSMOKEf<-ifelse(dtanew$CURSMOKE>= 0.5, "No", "Smoke")
dtanew$EDUC<- as.numeric(dtanew$EDUC)
dtanew$CIGPDAY<- as.numeric(dtanew$CIGPDAY)
dtanew$DIABP<- as.numeric(dtanew$DIABP)
dtanew$CIGPDAYf <- with(dtanew, cut(CIGPDAY, ordered=T, breaks=c(0, 10, 30, 60),labels=c("null", "lowsmoke","heavysmoke")))
dtanew$SEXf<-ifelse(dtanew$SEX>= 1.5, "F", "M") #SEX分為大於等於1.5(=F),小於(=M)
#Outline
##Introduction
##Method
##Results
##Discussion
##Conclusion
#Introduction
####本篇探討冠心病重要影響成因
####已知終身無法改變:年歲增高、性別。
####尚可改變的危險因子:高血壓、糖尿病、高脂血症、肥胖症、抽菸。
####期望透過分析找出哪些因素與冠心病具有關聯
#Method
###統計分析:
####CHD & BMI:Anova
####preCHD & EDU & Glucose & Heartbeat et.,al: Correlation
####SYSBP & ANYCHD:T-test
###視覺化圖:
####lollipop、Correlation plot、Extension coefficients plot、effects plot、Grouping、Boxplot、Error bars、Grouping conditioning、trends region
##BMI & AGE Grouping color relationship
ggplot(dtanew, aes(BMI, AGE,
color=BMIff)) +
geom_point()+
stat_smooth(aes(group=1),
method='loess',
formula= y ~ x,
col='orange', se=F)+
stat_smooth(method='lm',
formula=y ~ x, se=F) +
scale_color_manual(values=c('red',
'steelblue',
'darkgreen',
'black'),
guide=guide_legend(title=NULL))+
labs(x='BMI', y='AGE',
title='Relationship between BMI and AGE')+
theme_ipsum() +
theme(legend.position='bottom')+
theme(text=element_text(size=15))
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

#Anova BMI & TIMECHD
dtanew$TIMECHDyear<-dtanew$TIMECHD/365.25
dtanew$BMIff <- with(dtanew, cut(BMI, ordered=T, breaks=c(0,18.5, 24, 27,100),labels=c("underweight", "normal", "overweight","obese")))
dtaA<- knitr::kable(anova(lm(dtanew$TIMECHDyear ~ dtanew$BMIff, data=dtanew)))
table(dtaA)
## dtaA
## ------------- ----- ----------- ---------- --------- -------
## 1
## Df Sum Sq Mean Sq F value Pr(>F)
## 1
## dtanew$BMIff 3 2938.789 979.59649 20.11202 0
## 1
## Residuals 3834 186742.720 48.70702 NA NA
## 1
##DIABP by CIGPDAYf & BMIff - Dotplot lollipop plot
p <- aggregate(DIABP ~ CIGPDAYf + BMIff ,
data=dtanew,
FUN=mean) %>%
rename(m_DIABP=DIABP) %>%
unite(catg, CIGPDAYf, BMIff) %>%
ggplot( ) +
aes(x=m_DIABP,
y=reorder(catg, m_DIABP)) +
geom_point(size=rel(3)) +
geom_segment(aes(xend=mean(dtanew$DIABP),
yend=reorder(catg, m_DIABP)))+
labs(x="Mean DIABP",
y="CIGPDAY, BMI ") +
theme_minimal()+
theme(text=element_text(size=15))
p

#Correlation plot
dtar <-dtanew[ ,c(5,6,8,9,12,13,14,15,28)]
dtar %>% select_if(is.numeric) %>% na.omit() %>% cor() %>%
ggcorrplot::ggcorrplot(., type = 'lower', lab=T, hc.order = F, hc.method = 'centroid',colors = c('steelblue3', 'white', 'deeppink3'), sig.level=.2, insig = 'pch', pch.col = 'grey40') +
theme(text=element_text(size=15))
## Warning in cor(.): the standard deviation is zero

##DIABP- Extension coefficients plot
library(broom)
dtanew$SEXf<-ifelse(dtanew$SEX>= 1.5, "female", "male") #SEX分為大於等於1.5(=F),小於(=M)
dtanew$EDUCf <- with(dtanew, cut(EDUC, ordered=T, breaks=c(0 ,1, 2, 3, 4),labels=c("0_11yr", "high_Sc", "college","BS_BA")))
dtanew$BMIff <- with(dtanew, cut(BMI, ordered=T, breaks=c(0,18.5, 24, 27,100),labels=c("underweight", "normal", "overweight","obese")))
dtanew$CURSMOKEf<-ifelse(dtanew$CURSMOKE>= 0.5, "null", "smoke")
dtanew$ANYCHDf<-ifelse(dtanew$ANYCHD>= 0.5, "CHD", "noCHD")#ANYCHD分為大於等於.5(=Sick),小於(=Health)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
##
## nasa
m1 <- lm(DIABP ~ EDUCf + SEXf + BMIff + CURSMOKEf + ANYCHDf + EDUCf:SEXf + BMIff :CURSMOKEf:ANYCHDf, data=dtanew)
ggcoef(m1,exclude_intercept=FALSE, sort=NULL) +theme_minimal()

##SYSBP- Extension coefficients plot
library(broom)
dtanew$SEXf<-ifelse(dtanew$SEX>= 1.5, "female", "male") #SEX分為大於等於1.5(=F),小於(=M)
dtanew$EDUCf <- with(dtanew, cut(EDUC, ordered=T, breaks=c(0 ,1, 2, 3, 4),labels=c("0_11yr", "high_Sc", "college","BS_BA")))
dtanew$BMIff <- with(dtanew, cut(BMI, ordered=T, breaks=c(0,18.5, 24, 27,100),labels=c("underweight", "normal", "overweight","obese")))
dtanew$CURSMOKEf<-ifelse(dtanew$CURSMOKE>= 0.5, "null", "smoke")
dtanew$ANYCHDf<-ifelse(dtanew$ANYCHD>= 0.5, "CHD", "noCHD")#ANYCHD分為大於等於.5(=Sick),小於(=Health)
library(GGally)
m2 <- lm(SYSBP ~ EDUCf + SEXf + BMIff + CURSMOKEf + ANYCHDf + EDUCf:SEXf + BMIff :CURSMOKEf:ANYCHDf, data=dtanew)
ggcoef(m2,exclude_intercept=FALSE, sort=NULL) +theme_minimal()

##DIABP- Extension - effects plot
library(ggeffects)
m3 <- lm(DIABP ~ CIGPDAYf + BMIff + CIGPDAYf:BMIff, data=dtanew)
dta_m3 <- ggpredict(m3,terms = ~ CIGPDAYf + BMIff)
plot(dta_m3) +
labs(y="DIABP",
x="SMOKE")+
theme(text=element_text(size=15))

t.test(dtanew$SYSBP~dtanew$ANYCHDf, var.equal=TRUE)
##
## Two Sample t-test
##
## data: dtanew$SYSBP by dtanew$ANYCHDf
## t = 11.625, df = 3836, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 7.832767 11.010630
## sample estimates:
## mean in group CHD mean in group noCHD
## 139.5231 130.1014
###BMI & ANYCHD Grouping color relationship
ggplot(dtanew, aes(BMI, ANYCHD,
color=BMIF)) +
geom_point()+
stat_smooth(aes(group=1),
method='loess',
formula= y ~ x,
col='orange', se=F)+
stat_smooth(method='lm',
formula=y ~ x, se=F) +
scale_color_manual(values=c('red',
'steelblue',
'darkgreen',
'black'),
guide=guide_legend(title=NULL))+
labs(x='BMI', y='ANYCHD',
title='Relationship between BMI and ANYCHD')+
theme_ipsum() +
theme(legend.position='bottom')+
theme(text=element_text(size=15))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

###GLUCOSE & ANYCHD Grouping color relationship
ggplot(dtanew,aes(GLUCOSE,ANYCHD ,
color=ANYCHDf))+
geom_point()+
stat_smooth(aes(group=1),
method='loess',
formula= y ~ x,
col='green3', se=F)+
stat_smooth(method='lm',
formula=y ~ x, se=F) +
scale_color_manual(values=c('cyan',
'red'),
guide=guide_legend(title=NULL))+
labs(x='GLUCOSE', y='ANYCHD',
title='Relationship between GLUCOSE and ANYCHD')+
theme_ipsum() +
theme(legend.position='bottom')+
theme(text=element_text(size=15))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

##BMI & SBP:Boxplot
qplot( BMIff,SYSBP, colour = factor(BMIff), data = dtanew, geom = 'boxplot') +
labs(colour = 'BMI分組') +xlab("BMI分組") +ylab("SBP")+
theme(text=element_text(size=15))

###BMI ,ANYCHD → SYSBP Error bars for the means
pd <- position_dodge(.3)
p3 <- dtanew %>% group_by(BMIff, ANYCHDf) %>%
summarize(SYSBP_m=mean(SYSBP),
SYSBP_se=sd(SYSBP)/sqrt(n())) %>%
ggplot() +
aes(ANYCHDf, SYSBP_m,
group=BMIff,
shape=BMIff) +
geom_errorbar(aes(ymin=SYSBP_m - SYSBP_se,
ymax=SYSBP_m + SYSBP_se),
width=.2, size=.3,
position=pd) +
#不要線geom_line(position=pd,
#linetype='dotted') +
geom_point(position=pd,
size=rel(3)) +
scale_shape(guide=guide_legend(title=NULL)) +
labs(x="BMIF", y="SYSBP") +
theme_ipsum() +
theme(legend.position=c(.9, .8))+
theme(text=element_text(size=15))
p4 <- p3 +
aes(BMIff,
SYSBP_m,
group=ANYCHDf,
color=ANYCHDf,
shape=NULL) +
scale_color_manual(values=c('red',
'steelblue',
'darkgreen',
'black')) +
scale_shape(guide=guide_legend(title=NULL,
reverse=T))+
theme_ipsum() +
theme(text = element_text(family = '蘋方-繁 標準體'), legend.position = "none")+
theme(text = element_text(size = 16),
legend.position = c(0.9,0.1)) +
labs(x="BMI levels",
y="Mean SYSBP")
## Scale for 'shape' is already present. Adding another scale for 'shape', which
## will replace the existing scale.
p4
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

##BMI & CIGPDAY → SYSBP Grouping versus conditioning
dtanew$CIGPDAYf<- as.character(dtanew$CIGPDAYf)
p1 <- ggplot(dtanew,
aes(BMI, SYSBP,
color=CIGPDAYf)) +##只能是名稱,不接受數字
stat_smooth(method="lm",
formula=y ~ x, se=F) +
geom_point() +
coord_fixed() +
scale_color_manual(values=c('lightgreen',
'green',
'darkgreen'),
guide=guide_legend(title=NULL)) +
labs(x="BMI", y="SYSBP") +
theme_bw() +
theme(legend.position='top')+
theme(text=element_text(size=15))
p2 <- ggplot(dtanew,
aes(BMI, SYSBP)) +
stat_smooth(method="lm",
formula=y ~ x) +
geom_point(shape=20) +
facet_grid(CIGPDAYf ~ .) +
labs(x="BMI", y="SYSBP") +
theme_bw()+
theme(text=element_text(size=15))
##BMI & EDU by CURSMOKE Similar trends by Region
dtanew$EDUCf <- with(dtanew, cut(EDUC, ordered=T, breaks=c(0 ,1, 2, 3, 4),labels=c("0_11yr", "high_Sc", "college","BS_BA")))
xyplot(GLUCOSE~EDUCf|ANYCHDf,data=dtanew,
type=c("p","g","r"),layout=c(3,1),
xlab="EDU",
ylab="GLUCOSE")

##EDU & GLUCOSE by ANYCHD = 1:Boxplot
dtanew <- na.omit(dtanew)
dtaCHD <- dtanew %>%
filter(PERIOD == "1" & PREVCHD == "0" & ANYCHD == "1")
qplot( EDUCf,GLUCOSE, colour = factor(EDUCf), data = dtaCHD, geom = 'boxplot') +
labs(colour = 'EDUC分組') +xlab("EDUC分組") +ylab("GLUCOSE")+
theme(text=element_text(size=15))

##CIGPDAY & DIABP by BMI Similar trends by Region
p1 <- ggplot(dtanew,
aes(CIGPDAY,DIABP ,
color=BMIff)) +##只能是名稱,不接受數字
stat_smooth(method="lm",
formula=y ~ x, se=F) +
geom_point() +
coord_fixed() +
scale_color_manual(values=c('red',
'steelblue',
'darkgreen',
'black'),
guide=guide_legend(title=NULL)) +
labs(x="CIGPDAY", y="DIABP") +
theme_bw() +
theme(legend.position='top')+
theme(text=element_text(size=12))
p2 <- ggplot(dtanew,
aes(CIGPDAY,DIABP)) +
stat_smooth(method="lm",
formula=y ~ x) +
geom_point(shape=20) +
facet_grid(BMIff ~ .) +
labs(x="CIGPDAY", y="DIABP") +
theme_bw()+
theme(text=element_text(size=12))
grid.arrange(p1, p2, ncol=2)

#Conclusion
####冠心病與血壓、抽菸次數、BMI、血糖、教育程度具有高相關
####
####