library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2) 
library(viridis) 
## Loading required package: viridisLite
library(ggthemes)
library(tidyr)
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## √ tibble  2.1.3     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.5.0
## √ purrr   0.3.3
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lattice)
library(Ecdat)
## Loading required package: Ecfun
## 
## Attaching package: 'Ecfun'
## The following object is masked from 'package:base':
## 
##     sign
## 
## Attaching package: 'Ecdat'
## The following object is masked from 'package:datasets':
## 
##     Orange
library(gapminder)
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(grid)
dta <-read.csv("C:/tmp/S/run1.csv",header = T)
names(dta)[c(1,1)] <- c("RANDID")
dta <- na.omit(dta)
dtanew <- dta %>% 
  filter(PERIOD == "1" & PREVCHD == "0") 

dtanew$TIMECHDyear<-dtanew$TIMECHD/365.25
dtanew$BMIff <- with(dtanew, cut(BMI, ordered=T, breaks=c(0,18.5, 24, 27,100),labels=c("underweight", "normal", "overweight","obese")))  
dtaA<- knitr::kable(anova(lm(dtanew$TIMECHDyear ~ dtanew$BMIff, data=dtanew)))


dtanew <- dtanew %>% mutate(BMIF=cut(BMI, 
                                     breaks=quantile(BMI, 
                                                     breaks=c(0,18.5, 24, 27,100)),
                                     label=c("underweight", "normal", "overweight","obese"), 
                                     ordered=T, 
                                     include.lowest=T))

dtanew$ANYCHD<- as.numeric(dtanew$ANYCHD)
dtanew$ANYCHDf<-ifelse(dtanew$ANYCHD>= 0.5, "Sick", "Health") #ANYCHD分為大於等於.5(=Sick),小於(=Health)
dtanew$SYSBP<- as.numeric(dtanew$SYSBP)
dtanew$CURSMOKEf<-ifelse(dtanew$CURSMOKE>= 0.5, "No", "Smoke")
dtanew$EDUC<- as.numeric(dtanew$EDUC)
dtanew$CIGPDAY<- as.numeric(dtanew$CIGPDAY)
dtanew$DIABP<- as.numeric(dtanew$DIABP)
dtanew$CIGPDAYf <- with(dtanew, cut(CIGPDAY, ordered=T, breaks=c(0, 10, 30, 60),labels=c("null", "lowsmoke","heavysmoke"))) 
dtanew$SEXf<-ifelse(dtanew$SEX>= 1.5, "F", "M") #SEX分為大於等於1.5(=F),小於(=M)





  #Outline
  ##Introduction
  ##Method
  ##Results
  ##Discussion
  ##Conclusion
  

  #Introduction
  ####本篇探討冠心病重要影響成因
  ####已知終身無法改變:年歲增高、性別。
  ####尚可改變的危險因子:高血壓、糖尿病、高脂血症、肥胖症、抽菸。
  ####期望透過分析找出哪些因素與冠心病具有關聯
  
  #Method
  ###統計分析:
  ####CHD & BMI:Anova
  ####preCHD & EDU & Glucose & Heartbeat et.,al: Correlation
  ####SYSBP & ANYCHD:T-test
  ###視覺化圖:
  ####lollipop、Correlation plot、Extension coefficients plot、effects plot、Grouping、Boxplot、Error bars、Grouping  conditioning、trends region

  
##BMI & AGE Grouping color relationship
ggplot(dtanew, aes(BMI, AGE, 
                   color=BMIff)) +
  geom_point()+
  stat_smooth(aes(group=1),
              method='loess',
              formula= y ~ x, 
              col='orange', se=F)+
  stat_smooth(method='lm',
              formula=y ~ x, se=F) +
  scale_color_manual(values=c('red',
                              'steelblue',
                              'darkgreen',
                              'black'),
                     guide=guide_legend(title=NULL))+
  labs(x='BMI', y='AGE', 
       title='Relationship between BMI and AGE')+
  theme_ipsum() +
  theme(legend.position='bottom')+
  theme(text=element_text(size=15))
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

#Anova BMI & TIMECHD
dtanew$TIMECHDyear<-dtanew$TIMECHD/365.25
dtanew$BMIff <- with(dtanew, cut(BMI, ordered=T, breaks=c(0,18.5, 24, 27,100),labels=c("underweight", "normal", "overweight","obese")))  
dtaA<- knitr::kable(anova(lm(dtanew$TIMECHDyear ~ dtanew$BMIff, data=dtanew)))

table(dtaA)
## dtaA
## -------------  -----  -----------  ----------  ---------  ------- 
##                                                                 1 
##                   Df       Sum Sq     Mean Sq    F value   Pr(>F) 
##                                                                 1 
## dtanew$BMIff       3     2938.789   979.59649   20.11202        0 
##                                                                 1 
## Residuals       3834   186742.720    48.70702         NA       NA 
##                                                                 1
##DIABP by CIGPDAYf & BMIff - Dotplot lollipop plot
p <- aggregate(DIABP ~ CIGPDAYf + BMIff , 
               data=dtanew,
               FUN=mean) %>%
  rename(m_DIABP=DIABP) %>%
  unite(catg, CIGPDAYf, BMIff) %>%
  ggplot( ) + 
  aes(x=m_DIABP, 
      y=reorder(catg, m_DIABP)) +
  geom_point(size=rel(3)) +
  geom_segment(aes(xend=mean(dtanew$DIABP),
                   yend=reorder(catg, m_DIABP)))+
  labs(x="Mean DIABP", 
       y="CIGPDAY, BMI ") +
  theme_minimal()+
  theme(text=element_text(size=15))
p

#Correlation plot

dtar <-dtanew[ ,c(5,6,8,9,12,13,14,15,28)]
dtar %>% select_if(is.numeric) %>% na.omit() %>% cor() %>%
  ggcorrplot::ggcorrplot(., type = 'lower', lab=T, hc.order = F, hc.method = 'centroid',colors = c('steelblue3', 'white', 'deeppink3'), sig.level=.2, insig = 'pch', pch.col = 'grey40') +
  theme(text=element_text(size=15))
## Warning in cor(.): the standard deviation is zero

##DIABP-  Extension coefficients plot
library(broom)
dtanew$SEXf<-ifelse(dtanew$SEX>= 1.5, "female", "male") #SEX分為大於等於1.5(=F),小於(=M)
dtanew$EDUCf <- with(dtanew, cut(EDUC, ordered=T, breaks=c(0 ,1, 2, 3, 4),labels=c("0_11yr", "high_Sc", "college","BS_BA")))  
dtanew$BMIff <- with(dtanew, cut(BMI, ordered=T, breaks=c(0,18.5, 24, 27,100),labels=c("underweight", "normal", "overweight","obese"))) 
dtanew$CURSMOKEf<-ifelse(dtanew$CURSMOKE>= 0.5, "null", "smoke")
dtanew$ANYCHDf<-ifelse(dtanew$ANYCHD>= 0.5, "CHD", "noCHD")#ANYCHD分為大於等於.5(=Sick),小於(=Health)

library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
## 
##     nasa
m1 <- lm(DIABP ~ EDUCf + SEXf + BMIff + CURSMOKEf + ANYCHDf + EDUCf:SEXf + BMIff :CURSMOKEf:ANYCHDf, data=dtanew)
ggcoef(m1,exclude_intercept=FALSE, sort=NULL) +theme_minimal()

##SYSBP- Extension coefficients plot
library(broom)
dtanew$SEXf<-ifelse(dtanew$SEX>= 1.5, "female", "male") #SEX分為大於等於1.5(=F),小於(=M)
dtanew$EDUCf <- with(dtanew, cut(EDUC, ordered=T, breaks=c(0 ,1, 2, 3, 4),labels=c("0_11yr", "high_Sc", "college","BS_BA")))  
dtanew$BMIff <- with(dtanew, cut(BMI, ordered=T, breaks=c(0,18.5, 24, 27,100),labels=c("underweight", "normal", "overweight","obese"))) 
dtanew$CURSMOKEf<-ifelse(dtanew$CURSMOKE>= 0.5, "null", "smoke")
dtanew$ANYCHDf<-ifelse(dtanew$ANYCHD>= 0.5, "CHD", "noCHD")#ANYCHD分為大於等於.5(=Sick),小於(=Health)

library(GGally)
m2 <- lm(SYSBP ~ EDUCf + SEXf + BMIff + CURSMOKEf + ANYCHDf + EDUCf:SEXf + BMIff :CURSMOKEf:ANYCHDf, data=dtanew)
ggcoef(m2,exclude_intercept=FALSE, sort=NULL) +theme_minimal()

##DIABP- Extension - effects plot
library(ggeffects)

m3 <- lm(DIABP ~ CIGPDAYf + BMIff  +  CIGPDAYf:BMIff, data=dtanew)

dta_m3 <- ggpredict(m3,terms = ~ CIGPDAYf + BMIff)
plot(dta_m3) + 
  labs(y="DIABP", 
       x="SMOKE")+
  theme(text=element_text(size=15))

t.test(dtanew$SYSBP~dtanew$ANYCHDf, var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  dtanew$SYSBP by dtanew$ANYCHDf
## t = 11.625, df = 3836, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   7.832767 11.010630
## sample estimates:
##   mean in group CHD mean in group noCHD 
##            139.5231            130.1014
###BMI & ANYCHD Grouping color relationship

ggplot(dtanew, aes(BMI, ANYCHD, 
                   color=BMIF)) +
  geom_point()+
  stat_smooth(aes(group=1),
              method='loess',
              formula= y ~ x, 
              col='orange', se=F)+
  stat_smooth(method='lm',
              formula=y ~ x, se=F) +
  scale_color_manual(values=c('red',
                              'steelblue',
                              'darkgreen',
                              'black'),
                     guide=guide_legend(title=NULL))+
  labs(x='BMI', y='ANYCHD', 
       title='Relationship between BMI and ANYCHD')+
  theme_ipsum() +
  theme(legend.position='bottom')+
  theme(text=element_text(size=15))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

###GLUCOSE & ANYCHD Grouping color relationship
ggplot(dtanew,aes(GLUCOSE,ANYCHD , 
                  color=ANYCHDf))+
  geom_point()+
  stat_smooth(aes(group=1),
              method='loess',
              formula= y ~ x, 
              col='green3', se=F)+
  stat_smooth(method='lm',
              formula=y ~ x, se=F) +
  scale_color_manual(values=c('cyan',
                              'red'),
                     guide=guide_legend(title=NULL))+
  labs(x='GLUCOSE', y='ANYCHD', 
       title='Relationship between GLUCOSE and ANYCHD')+
  theme_ipsum() +
  theme(legend.position='bottom')+
  theme(text=element_text(size=15))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

##BMI & SBP:Boxplot
qplot( BMIff,SYSBP, colour = factor(BMIff), data = dtanew, geom = 'boxplot') + 
  labs(colour = 'BMI分組') +xlab("BMI分組") +ylab("SBP")+
  theme(text=element_text(size=15)) 

###BMI ,ANYCHD → SYSBP Error bars for the means
pd <- position_dodge(.3)
p3 <- dtanew  %>% group_by(BMIff, ANYCHDf) %>%
  summarize(SYSBP_m=mean(SYSBP), 
            SYSBP_se=sd(SYSBP)/sqrt(n())) %>%
  ggplot() + 
  aes(ANYCHDf, SYSBP_m, 
      group=BMIff, 
      shape=BMIff) +
  geom_errorbar(aes(ymin=SYSBP_m - SYSBP_se,
                    ymax=SYSBP_m + SYSBP_se),
                width=.2, size=.3, 
                position=pd) +
  #不要線geom_line(position=pd, 
  #linetype='dotted') +
  geom_point(position=pd, 
             size=rel(3)) +
  scale_shape(guide=guide_legend(title=NULL)) +
  labs(x="BMIF", y="SYSBP") +
  theme_ipsum() +
  theme(legend.position=c(.9, .8))+
  theme(text=element_text(size=15))

p4 <- p3 +
  aes(BMIff, 
      SYSBP_m, 
      group=ANYCHDf, 
      color=ANYCHDf, 
      shape=NULL) +
  scale_color_manual(values=c('red',
                              'steelblue',
                              'darkgreen',
                              'black')) +
  scale_shape(guide=guide_legend(title=NULL, 
                                 reverse=T))+
  theme_ipsum() +
  theme(text = element_text(family = '蘋方-繁 標準體'), legend.position = "none")+
  theme(text = element_text(size = 16),
        legend.position = c(0.9,0.1)) +
  labs(x="BMI levels", 
       y="Mean SYSBP") 
## Scale for 'shape' is already present. Adding another scale for 'shape', which
## will replace the existing scale.
p4
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

##BMI & CIGPDAY → SYSBP Grouping versus conditioning
dtanew$CIGPDAYf<- as.character(dtanew$CIGPDAYf)
p1 <- ggplot(dtanew, 
             aes(BMI, SYSBP, 
                 color=CIGPDAYf)) +##只能是名稱,不接受數字
  stat_smooth(method="lm", 
              formula=y ~ x, se=F) +
  geom_point() +
  coord_fixed() +
  scale_color_manual(values=c('lightgreen',
                              'green',
                              'darkgreen'),
                     guide=guide_legend(title=NULL)) +
  labs(x="BMI", y="SYSBP") +
  theme_bw() +
  theme(legend.position='top')+
  theme(text=element_text(size=15))
p2 <- ggplot(dtanew, 
             aes(BMI, SYSBP)) +
  stat_smooth(method="lm", 
              formula=y ~ x) +
  geom_point(shape=20) +
  facet_grid(CIGPDAYf ~ .) +
  labs(x="BMI", y="SYSBP") +
  theme_bw()+
  theme(text=element_text(size=15))

##BMI & EDU by CURSMOKE Similar trends by Region
dtanew$EDUCf <- with(dtanew, cut(EDUC, ordered=T, breaks=c(0 ,1, 2, 3, 4),labels=c("0_11yr", "high_Sc", "college","BS_BA")))  

xyplot(GLUCOSE~EDUCf|ANYCHDf,data=dtanew,
       type=c("p","g","r"),layout=c(3,1),
       xlab="EDU",
       ylab="GLUCOSE")

##EDU & GLUCOSE by ANYCHD = 1:Boxplot
dtanew <- na.omit(dtanew)
dtaCHD <- dtanew %>% 
  filter(PERIOD == "1" & PREVCHD == "0" & ANYCHD == "1") 
qplot( EDUCf,GLUCOSE, colour = factor(EDUCf), data = dtaCHD, geom = 'boxplot') + 
  labs(colour = 'EDUC分組') +xlab("EDUC分組") +ylab("GLUCOSE")+
  theme(text=element_text(size=15)) 

##CIGPDAY & DIABP by BMI Similar trends by Region
p1 <- ggplot(dtanew, 
             aes(CIGPDAY,DIABP , 
                 color=BMIff)) +##只能是名稱,不接受數字
  stat_smooth(method="lm", 
              formula=y ~ x, se=F) +
  geom_point() +
  coord_fixed() +
  scale_color_manual(values=c('red',
                              'steelblue',
                              'darkgreen',
                              'black'),
                     guide=guide_legend(title=NULL)) +
  labs(x="CIGPDAY", y="DIABP") +
  theme_bw() +
  theme(legend.position='top')+
  theme(text=element_text(size=12))
p2 <- ggplot(dtanew, 
             aes(CIGPDAY,DIABP)) +
  stat_smooth(method="lm", 
              formula=y ~ x) +
  geom_point(shape=20) +
  facet_grid(BMIff ~ .) +
  labs(x="CIGPDAY", y="DIABP") +
  theme_bw()+
  theme(text=element_text(size=12))

grid.arrange(p1, p2, ncol=2)

  #Conclusion
  ####冠心病與血壓、抽菸次數、BMI、血糖、教育程度具有高相關
  ####
  ####