2015年10月28日

Speaker

About me

  • Currently study at NCCU for M.S. degree of statistic
    • data analysis
    • machine learning
    • research some package application with R
    • learn some visualization skills
    • Familar with R、SQL-Server、Python

Outline

  • ggplot2 introduction
  • 基本起手式
  • 各種圖形指令參數統整
    • bar
    • boxplot
    • line
    • point
    • histogram
    • density
  • bonus ?

ggplot2 in R

> library(ggplot2)
> # list all geom
> ls(pattern = '^geom_', env = as.environment('package:ggplot2'))
 [1] "geom_abline"     "geom_area"       "geom_bar"       
 [4] "geom_bin2d"      "geom_blank"      "geom_boxplot"   
 [7] "geom_contour"    "geom_crossbar"   "geom_density"   
[10] "geom_density2d"  "geom_dotplot"    "geom_errorbar"  
[13] "geom_errorbarh"  "geom_freqpoly"   "geom_hex"       
[16] "geom_histogram"  "geom_hline"      "geom_jitter"    
[19] "geom_line"       "geom_linerange"  "geom_map"       
[22] "geom_path"       "geom_point"      "geom_pointrange"
[25] "geom_polygon"    "geom_quantile"   "geom_raster"    
[28] "geom_rect"       "geom_ribbon"     "geom_rug"       
[31] "geom_segment"    "geom_smooth"     "geom_step"      
[34] "geom_text"       "geom_tile"       "geom_violin"    
[37] "geom_vline"     

基本起手式

  • ggplot(data=…, aes(x=…, y=…)) + geom_xxx(…)
    • data=畫圖所需資料
    • aes=x、y所想擺放的變數
    • geom_XXX 則代表你想畫哪一種圖
    • ex:geom_bar、geom_points……
  • ggplot僅能使用data.frame儲存資料(不可丟其他屬性的物件)

各種圖形指令參數統整-bar

geom_bar

> data %>% group_by(city) %>% summarise(count=length(year)) -> datap
> ggplot(datap,aes(x=city,y=count))+geom_bar(stat="identity")

geom_bar

  • fill
  • theme_grey(base_family="STHeiti"
> ggplot(datap,aes(x=city,y=count,fill=city))+geom_bar(stat="identity")+
+   theme_grey(base_family="STHeiti") 

  • scale_fill_brewer(palette = "Paired")
> ggplot(datap,aes(x=city,y=count,fill=city))+geom_bar(stat="identity")+
+   theme_grey(base_family="STHeiti")+scale_fill_brewer(palette = "Paired")

geom_bar

  • labs(title ="pongpong胖胖", x = "pongpong", y = "胖胖")
> ggplot(datap,aes(x=city,y=count,fill=city))+geom_bar(stat="identity")+
+   theme_grey(base_family="STHeiti")+labs(title ="pongpong胖胖", x = "pongpong", y = "胖胖")

geom_bar

  • annotate("text", x =2, y =190, label ="love", colour = "black",size=5.5)
  • annotate('rect', xmin=0.5, xmax=1.5, ymin=-Inf, ymax=Inf, fill='blue', alpha=.25)
  • geom_hline(aes(yintercept=410),colour = "blue",linetype = "dashed", size = 1)
> ggplot(datap,aes(x=city,y=count,fill=city))+geom_bar(stat="identity")+
+   theme_grey(base_family="STHeiti")+labs(title ="pongpong胖胖", x = "pongpong", y = "胖胖")+
+   annotate("text", x =2, y =190, label ="幹",family="STHeiti", colour = "black",size=5.5)+
+   geom_hline(aes(yintercept=410),colour = "blue",linetype = "dashed", size = 1)

geom_bar

> tai %>% filter(交易月!="0") %>% filter(交易年!="103")%>% group_by(交易年,交易月) %>% 
+   summarise(total=length(鄉鎮市區)) -> ta
> ta$交易年 = as.factor(ta$交易年)
> ta$交易月 = as.factor(ta$交易月)
> ggplot(ta,aes(x=交易年,y=total,fill=交易月))+geom_bar(stat="identity",position="dodge")+
+   theme_grey(base_family="STHeiti")+labs(title ="pongpong胖胖", x = "pongpong", y = "胖胖")+
+   annotate("text", x =2, y =190, label ="幹",family="STHeiti", colour = "black",size=5.5)+
+   scale_fill_brewer(palette ="Set3")

geom_bar

  • facet_wrap(~交易月,ncol=3)
> tai %>% filter(交易月!="0") %>% filter(交易年!="103") %>% group_by(交易年,交易月,建物型態) %>% 
+   summarise(total=length(鄉鎮市區)) -> tt
> tt$交易年 = as.factor(tt$交易年)
> tt$交易月 = as.factor(tt$交易月)
> ggplot(tt,aes(x=建物型態,y=total,fill=交易月))+geom_bar(stat="identity",position="dodge")+
+   theme_grey(base_family="STHeiti")+labs(title ="pongpong胖胖", x = "pongpong", y = "胖胖")+
+   annotate("text", x =2, y =190, label ="幹",family="STHeiti", colour = "black",size=5.5)+
+   scale_fill_brewer(palette ="Set3")+facet_wrap(~交易年,ncol=3)

各種圖形指令參數統整-boxplot

geom_boxplot

> ggplot(iris,aes(x=Species,y=Sepal.Length))+geom_boxplot()

  • geom_boxplot(outlier.colour = "red", outlier.size = 3)
> ggplot(iris,aes(x=Species,y=Sepal.Length))+ geom_boxplot(outlier.colour="red", outlier.size = 3)

geom_boxplot

  • aes(fill=Species)
> ggplot(iris,aes(x=Species,y=Sepal.Length))+
+   geom_boxplot(aes(fill=Species), colour = "black",outlier.colour="red", outlier.size = 3)

- fill =c("blue","red","yellow")

> ggplot(iris,aes(x=Species,y=Sepal.Length))+
+   geom_boxplot(fill =c("blue","red","yellow"), colour = "#3366FF",outlier.colour="red", outlier.size = 3)

geom_boxplot

  • fill ="white"
> ggplot(iris,aes(x=Species,y=Sepal.Length))+
+   geom_boxplot(fill ="white", colour = "#3366FF",outlier.colour="red", outlier.size = 3)

  • boxplot先這樣就好 !!ㄏㄏㄏ

各種圖形指令參數統整-line

色盤控制

geom_line

  • 轉成因子後要加上group=1
> tai %>% group_by(交易月) %>% filter(交易月!="0") %>% 
+   summarise(total = length(鄉鎮市區)) -> taii
> taii$交易月 = as.factor(taii$交易月)
> ggplot(taii,aes(x=交易月, y=total,group=1)) + geom_line()

geom_line

  • group=交易年,color=交易年,linetype=交易年
  • theme_grey(base_family="STHeiti")
> tai %>% group_by(交易年,交易月) %>% filter(交易月!="0") %>% filter(交易年!="103")%>%
+   summarise(total = length(鄉鎮市區)) -> taiii
> taiii$交易年 = as.factor(taiii$交易年);taiii$交易月 = as.factor(taiii$交易月)
> ggplot(taiii,aes(x=交易月,y=total,group=交易年,color=交易年,linetype=交易年))+geom_line()+
+   theme_grey(base_family="STHeiti")

geom_line

  • scale_color_discrete(name='pongpong', labels=c('00', 'z雲'))
  • how to change color of line ???
  • geom_point(size=2,colour="black")
> cbbPalette <- c( "#D55E00", "#CC79A7")
> ggplot(taiii,aes(x=交易月,y=total,colour=交易年, group=交易年))+geom_line()+
+   theme_grey(base_family="STHeiti")+scale_color_discrete(name='pongpong', labels=c('00', 'z雲'))+scale_colour_manual(values=cbbPalette)+geom_point(size=2,colour="black")

各種圖形指令參數統整-point

geom_point

> ggplot(tai,aes(x=屋齡,y=單價.元.平方公尺.))+geom_point()

geom_point

  • geom_point(position='jitter', alpha=0.3,size=7,color="#CC79A7")
  • stat_smooth(method=lm, level=.95,color="gray33")
> ggplot(tai,aes(x=屋齡,y=單價.元.平方公尺.))+geom_point(position='jitter', alpha=0.3,size=3,color="#CC79A7")+theme_grey(base_family="STHeiti")+
+ stat_smooth(method=lm, level=.95,color="gray33")

geom_point

  • coord_cartesian(xlim = c(0,55))
> ggplot(tai,aes(x=屋齡,y=單價.元.平方公尺.))+geom_point(position='jitter', alpha=0.3,size=3,color="#CC79A7")+theme_grey(base_family="STHeiti")+
+ stat_smooth(method=lm, level=.95,color="gray33")+coord_cartesian(xlim = c(0,55))

geom_point

  • cbPalette <- c( "#0072B2", "#D55E00", "#CC79A7")
  • scale_colour_manual(values=cbPalette)
> tai$屋齡 = as.numeric(tai$屋齡)
> cbPalette <- c("#0072B2", "#D55E00", "#CC79A7")
> ggplot(tai,aes(x=屋齡,y=單價.元.平方公尺.,color=交易年))+geom_point(position='jitter', alpha=0.3,size=3)+
+   theme_grey(base_family="STHeiti")+
+   stat_smooth(method=lm, level=.95)+coord_cartesian(xlim = c(0,55))

未完待續~