source('https://raw.githubusercontent.com/ywchiu/cdc_course/master/script/multiplot.R')


library(readr)
## Warning: package 'readr' was built under R version 3.4.4
measles <-read_csv("https://raw.githubusercontent.com/ywchiu/cdc_course/master/data/measles.csv")
## Parsed with column specification:
## cols(
##   確定病名 = col_character(),
##   發病年份 = col_integer(),
##   發病月份 = col_integer(),
##   縣市 = col_character(),
##   鄉鎮 = col_character(),
##   性別 = col_character(),
##   是否為境外移入 = col_character(),
##   年齡層 = col_character(),
##   確定病例數 = col_integer()
## )
head(measles)
## # A tibble: 6 x 9
##   確定病名 發病年份 發病月份 縣市   鄉鎮   性別  是否為境外移入 年齡層
##   <chr>       <int>    <int> <chr>  <chr>  <chr> <chr>          <chr> 
## 1 麻疹         2009        4 高雄市 小港區 M     否             20-24 
## 2 麻疹         2009        5 基隆市 中山區 M     否             30-34 
## 3 麻疹         2011        6 新北市 蘆洲區 F     是             25-29 
## 4 麻疹         2014        1 高雄市 三民區 F     是             0     
## 5 麻疹         2017        3 台北市 松山區 F     是             0     
## 6 麻疹         2018        4 桃園市 蘆竹區 F     否             30-34 
## # ... with 1 more variable: 確定病例數 <int>
measles$發病時間<-as.Date(with(measles, paste(發病年份, 發病月份, '01',sep="-")), "%Y-%m-%d")

head(measles)
## # A tibble: 6 x 10
##   確定病名 發病年份 發病月份 縣市   鄉鎮   性別  是否為境外移入 年齡層
##   <chr>       <int>    <int> <chr>  <chr>  <chr> <chr>          <chr> 
## 1 麻疹         2009        4 高雄市 小港區 M     否             20-24 
## 2 麻疹         2009        5 基隆市 中山區 M     否             30-34 
## 3 麻疹         2011        6 新北市 蘆洲區 F     是             25-29 
## 4 麻疹         2014        1 高雄市 三民區 F     是             0     
## 5 麻疹         2017        3 台北市 松山區 F     是             0     
## 6 麻疹         2018        4 桃園市 蘆竹區 F     否             30-34 
## # ... with 2 more variables: 確定病例數 <int>, 發病時間 <date>
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
p1 <-ggplot(measles, aes(x =發病時間, y =確定病例數))
p1

p2 <- p1 +geom_point(aes(color=factor(性別)))
p3 <- p1 +geom_point(aes(shape=factor(性別)))


multiplot(p2, p3, cols = 2)
## Loading required package: grid

p1 +
  geom_point()+
  xlab("時間")+
  ylab("病例數")+
  ggtitle("麻疹發病趨勢")

p1 +geom_point(aes(color=factor(性別)))+geom_line(aes(color=factor(性別)))

p1 +geom_line(aes(color=factor(性別))) +geom_point(aes(color=factor(性別)))

Geometries

load("C:/Users/nc20/Downloads/cdc.Rdata")
class(cdc)
## [1] "data.frame"
str(cdc)
## 'data.frame':    20000 obs. of  9 variables:
##  $ genhlth : Factor w/ 5 levels "excellent","very good",..: 3 3 3 3 2 2 2 2 3 3 ...
##  $ exerany : num  0 0 1 1 0 1 1 0 0 1 ...
##  $ hlthplan: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ smoke100: num  0 1 1 0 0 0 0 0 1 0 ...
##  $ height  : num  70 64 60 66 61 64 71 67 65 70 ...
##  $ weight  : int  175 125 105 132 150 114 194 170 150 180 ...
##  $ wtdesire: int  175 115 105 124 130 114 185 160 130 170 ...
##  $ age     : int  77 33 49 42 55 55 31 45 27 44 ...
##  $ gender  : Factor w/ 2 levels "m","f": 1 2 2 2 2 2 1 1 2 1 ...
summary(cdc)
##       genhlth        exerany          hlthplan         smoke100     
##  excellent:4657   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  very good:6972   1st Qu.:0.0000   1st Qu.:1.0000   1st Qu.:0.0000  
##  good     :5675   Median :1.0000   Median :1.0000   Median :0.0000  
##  fair     :2019   Mean   :0.7457   Mean   :0.8738   Mean   :0.4721  
##  poor     : 677   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##                   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##      height          weight         wtdesire          age        gender   
##  Min.   :48.00   Min.   : 68.0   Min.   : 68.0   Min.   :18.00   m: 9569  
##  1st Qu.:64.00   1st Qu.:140.0   1st Qu.:130.0   1st Qu.:31.00   f:10431  
##  Median :67.00   Median :165.0   Median :150.0   Median :43.00            
##  Mean   :67.18   Mean   :169.7   Mean   :155.1   Mean   :45.07            
##  3rd Qu.:70.00   3rd Qu.:190.0   3rd Qu.:175.0   3rd Qu.:57.00            
##  Max.   :93.00   Max.   :500.0   Max.   :680.0   Max.   :99.00
head(cdc)
##     genhlth exerany hlthplan smoke100 height weight wtdesire age gender
## 1      good       0        1        0     70    175      175  77      m
## 2      good       0        1        1     64    125      115  33      f
## 3      good       1        1        1     60    105      105  49      f
## 4      good       1        1        0     66    132      124  42      f
## 5 very good       0        1        0     61    150      130  55      f
## 6 very good       1        1        0     64    114      114  55      f
# method 1
plot(cdc$weight, cdc$wtdesire, type= 'p')

# method 2
#  ~ formula: wtdesire ~ weight => wtdesire = weight * a + b
plot(wtdesire ~ weight,data = cdc, type= 'p')

# method 3
plot(wtdesire ~ weight,data = cdc, type= 'n')
points(cdc$weight, cdc$wtdesire)

## ggplot method
p1 <- ggplot(data=cdc, aes(x=weight, y =wtdesire))
p1 + geom_point()

# R histogram method 
par(mfrow = c(1,3))
hist(cdc$weight)
hist(cdc$weight, breaks = 100)
hist(cdc$weight, breaks = 300)

# GGPlot histogram method 
histogram<-ggplot(data=cdc, aes(x=weight))
#histogram
p1 <- histogram+geom_histogram(binwidth=5, color="black")
p2 <- histogram+geom_histogram(binwidth=10, color="black")
p3 <- histogram+geom_histogram(binwidth=20, color="black")
multiplot(p1, p2, p3, cols = 3)

# Density Plot
density<-ggplot(data=cdc, aes(x=weight))
density + geom_density(stat="density", alpha=I(0.2), fill='blue', aes(x = weight, y = ..count.. ))

histogram +geom_histogram(binwidth=5, color="black") 

# Boxplot: R Method
boxplot(cdc$weight)
boxplot(cdc$weight ~ cdc$gender)

# Boxplot: GGPlot2 Method
box<-ggplot(data=cdc, aes(x=gender, y=weight))
b <- box+geom_boxplot(aes(fill=gender ))

histogram <- ggplot(data=cdc, aes(x=weight))
h <- histogram+geom_histogram(binwidth=5,aes(fill=gender ))

multiplot(b, h, cols=1)



# add density to histogram
histogram<-ggplot(data=cdc, aes(x=weight))
histogram  + geom_histogram(binwidth = 10, color="black") + geom_density(stat="density", alpha=I(0.2), fill='blue', aes(x = weight, y = ..count.. * 10))
?geom_density
## starting httpd help server ... done
# Violin Plot
p <- ggplot(data = cdc, aes(x = factor(gender), y = weight))
p+geom_violin(aes(fill = factor(gender) ))



## R Barplot 
#str(cdc)
table(cdc$genhlth)
## 
## excellent very good      good      fair      poor 
##      4657      6972      5675      2019       677
barplot(table(cdc$genhlth))

## GGPlot2 Barplot
bar <-ggplot(data=cdc, aes(x=genhlth))
# y = ax + b
# y = slope * x + intercept
bar +geom_bar() + geom_abline(aes(intercept = 4000,slope = 0, color='red'),linetype = 'dashed')

# geom_hline => yintercept
bar +geom_bar() + geom_hline(aes(yintercept = 4000, color='red'),linetype = 'dashed')

p1 <- ggplot(data=cdc, aes(x=weight, y =wtdesire))
p1 + geom_point() + geom_vline(aes(xintercept = 300), color= 'red') + geom_hline(aes(yintercept = 400), color='blue')

?geom_abline

# R Pie chart
pie(table(cdc$gender), init.angle = 90)

# ggplot2 Pie Chart

cdc_sex <- as.data.frame(table(cdc$gender))

pie<-ggplot(cdc_sex, aes(x="", y=Freq,fill=Var1 ))

p1 <- pie +geom_bar(width=1, stat ="identity")
p1 + coord_polar(theta="y", start=0) +geom_text(aes(label=Freq, y =c(15000, 5000)), size=5)

Statsitics

smooth <- ggplot(data=cdc, aes(x=weight, y=wtdesire, color=gender))+geom_point(aes(shape=gender), size=1.5)


lm_gender <- function(gender){
  fit <- lm(wtdesire ~ weight, data = cdc[cdc$gender==gender,])
  f <- paste('wtdesire', '~', 'weight * ', round(fit$coefficients[2], 2), '+',  round(fit$coefficients[1],2) )
  f
}
lm_gender('m')
## [1] "wtdesire ~ weight *  0.55 + 74.23"
lm_gender('f')
## [1] "wtdesire ~ weight *  0.41 + 71.62"
smooth + geom_smooth(method = 'lm') +  geom_text(x = 400, y = 400, label = lm_gender('m'),color = 'black') + geom_text(x = 400, y = 100, label = lm_gender('f'), color= 'black')

#smooth + geom_smooth(method = 'loess')

# geom_jitter
box<-ggplot(data=cdc, aes(x=gender, y=weight))

b1 <- box+geom_jitter()+geom_boxplot(aes(fill=gender ))
b2 <- box+geom_point()+geom_boxplot(aes(fill=gender ))
b3 <- box+geom_boxplot(aes(fill=gender )) + geom_point()

multiplot(b1, b2, b3, cols = 3)

Facets

w <-ggplot(data=cdc, aes(x=weight, y =wtdesire)) +
  geom_point(aes(color=factor(gender))) +
  geom_smooth(method ='lm')
w

w + facet_grid(gender ~ . )

w + facet_grid(. ~ gender )

w + facet_grid(gender ~ genhlth )

w + facet_grid(genhlth ~  gender)

w + xlim(200,300) +  ylim(200,300)
## Warning: Removed 18140 rows containing non-finite values (stat_smooth).
## Warning: Removed 18140 rows containing missing values (geom_point).

histogram<-ggplot(data=cdc, aes(x=weight))
histogram+geom_histogram(binwidth=10, color="black") +ylim(0,1000)
## Warning: Removed 9 rows containing missing values (geom_bar).

histogram+geom_histogram(binwidth=10, color="black") +coord_cartesian(ylim = c(0,1000))

w + facet_grid(gender ~ genhlth ) + ylim(200,300)
## Warning: Removed 17904 rows containing non-finite values (stat_smooth).
## Warning: Removed 17904 rows containing missing values (geom_point).
## Warning: Removed 56 rows containing missing values (geom_smooth).

w + facet_grid(gender ~ genhlth ) + coord_cartesian(ylim = c(200,300))

histogram + geom_histogram(binwidth=10,aes(fill=factor(gender))) + facet_grid(gender ~.) + ylim(0,1000)
## Warning: Removed 9 rows containing missing values (geom_bar).

histogram + geom_histogram(binwidth=10,aes(fill=factor(gender))) + facet_grid(gender ~.) + coord_cartesian(ylim =c(0,1000),xlim=c(100,200))

Theme

w <-ggplot(data=cdc, aes(x=weight, y =wtdesire))+geom_point(aes(color=factor(gender)))+geom_smooth(method ='lm')
w +xlab('體重')+ylab('理想體重')+ggtitle('體重v.s. 理想體重') + theme(axis.title.x=element_text(color ='DarkGreen', size =10),
                                                          axis.title.y=element_text(color ='Red', size =10), 
                                                          axis.text.x=element_text(size =30),
                                                          axis.text.y=element_text(size =15))

w + ggtitle('體重v.s. 理想體重') + theme(plot.title  = element_text(size = 20, hjust = 0.5,color='orange'))

w +xlab('體重')+ylab('理想體重')+ggtitle('體重v.s. 理想體重')+
  scale_color_manual(name ='性別',labels=c("MALE", "FEMALE"), values = c('red', 'blue'))+
  theme(
    legend.text=element_text(size =15),
    legend.title=element_text(size =30),
    legend.position=c(1,1),
    legend.justification=c(1,1)
    )

?theme

w + theme_dark()

w + theme_gray()

w + theme_light()

Export Graph

jpeg('a.jpg')
a <- pie(table(cdc$gender))
dev.off()
## png 
##   2
w 

ggsave('w1.jpg')
## Saving 7 x 5 in image
w2 <- w + theme_dark()
ggsave(w2, file = 'w2.jpg')
## Saving 7 x 5 in image
ggsave(w2, file = 'w3.jpg', width = 6, height = 4)