source('https://raw.githubusercontent.com/ywchiu/cdc_course/master/script/multiplot.R')
library(readr)
## Warning: package 'readr' was built under R version 3.4.4
measles <-read_csv("https://raw.githubusercontent.com/ywchiu/cdc_course/master/data/measles.csv")
## Parsed with column specification:
## cols(
## 確定病名 = col_character(),
## 發病年份 = col_integer(),
## 發病月份 = col_integer(),
## 縣市 = col_character(),
## 鄉鎮 = col_character(),
## 性別 = col_character(),
## 是否為境外移入 = col_character(),
## 年齡層 = col_character(),
## 確定病例數 = col_integer()
## )
head(measles)
## # A tibble: 6 x 9
## 確定病名 發病年份 發病月份 縣市 鄉鎮 性別 是否為境外移入 年齡層
## <chr> <int> <int> <chr> <chr> <chr> <chr> <chr>
## 1 麻疹 2009 4 高雄市 小港區 M 否 20-24
## 2 麻疹 2009 5 基隆市 中山區 M 否 30-34
## 3 麻疹 2011 6 新北市 蘆洲區 F 是 25-29
## 4 麻疹 2014 1 高雄市 三民區 F 是 0
## 5 麻疹 2017 3 台北市 松山區 F 是 0
## 6 麻疹 2018 4 桃園市 蘆竹區 F 否 30-34
## # ... with 1 more variable: 確定病例數 <int>
measles$發病時間<-as.Date(with(measles, paste(發病年份, 發病月份, '01',sep="-")), "%Y-%m-%d")
head(measles)
## # A tibble: 6 x 10
## 確定病名 發病年份 發病月份 縣市 鄉鎮 性別 是否為境外移入 年齡層
## <chr> <int> <int> <chr> <chr> <chr> <chr> <chr>
## 1 麻疹 2009 4 高雄市 小港區 M 否 20-24
## 2 麻疹 2009 5 基隆市 中山區 M 否 30-34
## 3 麻疹 2011 6 新北市 蘆洲區 F 是 25-29
## 4 麻疹 2014 1 高雄市 三民區 F 是 0
## 5 麻疹 2017 3 台北市 松山區 F 是 0
## 6 麻疹 2018 4 桃園市 蘆竹區 F 否 30-34
## # ... with 2 more variables: 確定病例數 <int>, 發病時間 <date>
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
p1 <-ggplot(measles, aes(x =發病時間, y =確定病例數))
p1

p2 <- p1 +geom_point(aes(color=factor(性別)))
p3 <- p1 +geom_point(aes(shape=factor(性別)))
multiplot(p2, p3, cols = 2)
## Loading required package: grid

p1 +
geom_point()+
xlab("時間")+
ylab("病例數")+
ggtitle("麻疹發病趨勢")

p1 +geom_point(aes(color=factor(性別)))+geom_line(aes(color=factor(性別)))

p1 +geom_line(aes(color=factor(性別))) +geom_point(aes(color=factor(性別)))

Geometries
load("C:/Users/nc20/Downloads/cdc.Rdata")
class(cdc)
## [1] "data.frame"
str(cdc)
## 'data.frame': 20000 obs. of 9 variables:
## $ genhlth : Factor w/ 5 levels "excellent","very good",..: 3 3 3 3 2 2 2 2 3 3 ...
## $ exerany : num 0 0 1 1 0 1 1 0 0 1 ...
## $ hlthplan: num 1 1 1 1 1 1 1 1 1 1 ...
## $ smoke100: num 0 1 1 0 0 0 0 0 1 0 ...
## $ height : num 70 64 60 66 61 64 71 67 65 70 ...
## $ weight : int 175 125 105 132 150 114 194 170 150 180 ...
## $ wtdesire: int 175 115 105 124 130 114 185 160 130 170 ...
## $ age : int 77 33 49 42 55 55 31 45 27 44 ...
## $ gender : Factor w/ 2 levels "m","f": 1 2 2 2 2 2 1 1 2 1 ...
summary(cdc)
## genhlth exerany hlthplan smoke100
## excellent:4657 Min. :0.0000 Min. :0.0000 Min. :0.0000
## very good:6972 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.:0.0000
## good :5675 Median :1.0000 Median :1.0000 Median :0.0000
## fair :2019 Mean :0.7457 Mean :0.8738 Mean :0.4721
## poor : 677 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## height weight wtdesire age gender
## Min. :48.00 Min. : 68.0 Min. : 68.0 Min. :18.00 m: 9569
## 1st Qu.:64.00 1st Qu.:140.0 1st Qu.:130.0 1st Qu.:31.00 f:10431
## Median :67.00 Median :165.0 Median :150.0 Median :43.00
## Mean :67.18 Mean :169.7 Mean :155.1 Mean :45.07
## 3rd Qu.:70.00 3rd Qu.:190.0 3rd Qu.:175.0 3rd Qu.:57.00
## Max. :93.00 Max. :500.0 Max. :680.0 Max. :99.00
head(cdc)
## genhlth exerany hlthplan smoke100 height weight wtdesire age gender
## 1 good 0 1 0 70 175 175 77 m
## 2 good 0 1 1 64 125 115 33 f
## 3 good 1 1 1 60 105 105 49 f
## 4 good 1 1 0 66 132 124 42 f
## 5 very good 0 1 0 61 150 130 55 f
## 6 very good 1 1 0 64 114 114 55 f
# method 1
plot(cdc$weight, cdc$wtdesire, type= 'p')

# method 2
# ~ formula: wtdesire ~ weight => wtdesire = weight * a + b
plot(wtdesire ~ weight,data = cdc, type= 'p')

# method 3
plot(wtdesire ~ weight,data = cdc, type= 'n')
points(cdc$weight, cdc$wtdesire)

## ggplot method
p1 <- ggplot(data=cdc, aes(x=weight, y =wtdesire))
p1 + geom_point()

# R histogram method
par(mfrow = c(1,3))
hist(cdc$weight)
hist(cdc$weight, breaks = 100)
hist(cdc$weight, breaks = 300)

# GGPlot histogram method
histogram<-ggplot(data=cdc, aes(x=weight))
#histogram
p1 <- histogram+geom_histogram(binwidth=5, color="black")
p2 <- histogram+geom_histogram(binwidth=10, color="black")
p3 <- histogram+geom_histogram(binwidth=20, color="black")
multiplot(p1, p2, p3, cols = 3)

# Density Plot
density<-ggplot(data=cdc, aes(x=weight))
density + geom_density(stat="density", alpha=I(0.2), fill='blue', aes(x = weight, y = ..count.. ))

histogram +geom_histogram(binwidth=5, color="black")

# Boxplot: R Method
boxplot(cdc$weight)
boxplot(cdc$weight ~ cdc$gender)
# Boxplot: GGPlot2 Method
box<-ggplot(data=cdc, aes(x=gender, y=weight))
b <- box+geom_boxplot(aes(fill=gender ))
histogram <- ggplot(data=cdc, aes(x=weight))
h <- histogram+geom_histogram(binwidth=5,aes(fill=gender ))
multiplot(b, h, cols=1)
# add density to histogram
histogram<-ggplot(data=cdc, aes(x=weight))
histogram + geom_histogram(binwidth = 10, color="black") + geom_density(stat="density", alpha=I(0.2), fill='blue', aes(x = weight, y = ..count.. * 10))
?geom_density
## starting httpd help server ... done
# Violin Plot
p <- ggplot(data = cdc, aes(x = factor(gender), y = weight))
p+geom_violin(aes(fill = factor(gender) ))
## R Barplot
#str(cdc)
table(cdc$genhlth)
##
## excellent very good good fair poor
## 4657 6972 5675 2019 677
barplot(table(cdc$genhlth))

## GGPlot2 Barplot
bar <-ggplot(data=cdc, aes(x=genhlth))
# y = ax + b
# y = slope * x + intercept
bar +geom_bar() + geom_abline(aes(intercept = 4000,slope = 0, color='red'),linetype = 'dashed')

# geom_hline => yintercept
bar +geom_bar() + geom_hline(aes(yintercept = 4000, color='red'),linetype = 'dashed')

p1 <- ggplot(data=cdc, aes(x=weight, y =wtdesire))
p1 + geom_point() + geom_vline(aes(xintercept = 300), color= 'red') + geom_hline(aes(yintercept = 400), color='blue')

?geom_abline
# R Pie chart
pie(table(cdc$gender), init.angle = 90)
# ggplot2 Pie Chart
cdc_sex <- as.data.frame(table(cdc$gender))
pie<-ggplot(cdc_sex, aes(x="", y=Freq,fill=Var1 ))
p1 <- pie +geom_bar(width=1, stat ="identity")
p1 + coord_polar(theta="y", start=0) +geom_text(aes(label=Freq, y =c(15000, 5000)), size=5)

Statsitics
smooth <- ggplot(data=cdc, aes(x=weight, y=wtdesire, color=gender))+geom_point(aes(shape=gender), size=1.5)
lm_gender <- function(gender){
fit <- lm(wtdesire ~ weight, data = cdc[cdc$gender==gender,])
f <- paste('wtdesire', '~', 'weight * ', round(fit$coefficients[2], 2), '+', round(fit$coefficients[1],2) )
f
}
lm_gender('m')
## [1] "wtdesire ~ weight * 0.55 + 74.23"
lm_gender('f')
## [1] "wtdesire ~ weight * 0.41 + 71.62"
smooth + geom_smooth(method = 'lm') + geom_text(x = 400, y = 400, label = lm_gender('m'),color = 'black') + geom_text(x = 400, y = 100, label = lm_gender('f'), color= 'black')

#smooth + geom_smooth(method = 'loess')
# geom_jitter
box<-ggplot(data=cdc, aes(x=gender, y=weight))
b1 <- box+geom_jitter()+geom_boxplot(aes(fill=gender ))
b2 <- box+geom_point()+geom_boxplot(aes(fill=gender ))
b3 <- box+geom_boxplot(aes(fill=gender )) + geom_point()
multiplot(b1, b2, b3, cols = 3)

Facets
w <-ggplot(data=cdc, aes(x=weight, y =wtdesire)) +
geom_point(aes(color=factor(gender))) +
geom_smooth(method ='lm')
w

w + facet_grid(gender ~ . )

w + facet_grid(. ~ gender )

w + facet_grid(gender ~ genhlth )

w + facet_grid(genhlth ~ gender)

w + xlim(200,300) + ylim(200,300)
## Warning: Removed 18140 rows containing non-finite values (stat_smooth).
## Warning: Removed 18140 rows containing missing values (geom_point).

histogram<-ggplot(data=cdc, aes(x=weight))
histogram+geom_histogram(binwidth=10, color="black") +ylim(0,1000)
## Warning: Removed 9 rows containing missing values (geom_bar).

histogram+geom_histogram(binwidth=10, color="black") +coord_cartesian(ylim = c(0,1000))

w + facet_grid(gender ~ genhlth ) + ylim(200,300)
## Warning: Removed 17904 rows containing non-finite values (stat_smooth).
## Warning: Removed 17904 rows containing missing values (geom_point).
## Warning: Removed 56 rows containing missing values (geom_smooth).

w + facet_grid(gender ~ genhlth ) + coord_cartesian(ylim = c(200,300))

histogram + geom_histogram(binwidth=10,aes(fill=factor(gender))) + facet_grid(gender ~.) + ylim(0,1000)
## Warning: Removed 9 rows containing missing values (geom_bar).

histogram + geom_histogram(binwidth=10,aes(fill=factor(gender))) + facet_grid(gender ~.) + coord_cartesian(ylim =c(0,1000),xlim=c(100,200))

Theme
w <-ggplot(data=cdc, aes(x=weight, y =wtdesire))+geom_point(aes(color=factor(gender)))+geom_smooth(method ='lm')
w +xlab('體重')+ylab('理想體重')+ggtitle('體重v.s. 理想體重') + theme(axis.title.x=element_text(color ='DarkGreen', size =10),
axis.title.y=element_text(color ='Red', size =10),
axis.text.x=element_text(size =30),
axis.text.y=element_text(size =15))

w + ggtitle('體重v.s. 理想體重') + theme(plot.title = element_text(size = 20, hjust = 0.5,color='orange'))

w +xlab('體重')+ylab('理想體重')+ggtitle('體重v.s. 理想體重')+
scale_color_manual(name ='性別',labels=c("MALE", "FEMALE"), values = c('red', 'blue'))+
theme(
legend.text=element_text(size =15),
legend.title=element_text(size =30),
legend.position=c(1,1),
legend.justification=c(1,1)
)

?theme
w + theme_dark()

w + theme_gray()

w + theme_light()

Export Graph
jpeg('a.jpg')
a <- pie(table(cdc$gender))
dev.off()
## png
## 2
w

ggsave('w1.jpg')
## Saving 7 x 5 in image
w2 <- w + theme_dark()
ggsave(w2, file = 'w2.jpg')
## Saving 7 x 5 in image
ggsave(w2, file = 'w3.jpg', width = 6, height = 4)