geom_violin()

# パッケージの読み込み
library(ggplot2)
library(gcookbook)
 
#用いるデータセット
data(heightweight)
head(heightweight)
##   sex ageYear ageMonth heightIn weightLb
## 1   f   11.92      143     56.3     85.0
## 2   f   12.92      155     62.3    105.0
## 3   f   12.75      153     63.3    108.0
## 4   f   13.42      161     59.0     92.0
## 5   f   15.92      191     62.5    112.5
## 6   f   14.25      171     62.5    112.0
str(heightweight)
## 'data.frame':    236 obs. of  5 variables:
##  $ sex     : Factor w/ 2 levels "f","m": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ageYear : num  11.9 12.9 12.8 13.4 15.9 ...
##  $ ageMonth: int  143 155 153 161 191 171 185 142 160 140 ...
##  $ heightIn: num  56.3 62.3 63.3 59 62.5 62.5 59 56.5 62 53.8 ...
##  $ weightLb: num  85 105 108 92 112 ...
# プロット
p <- ggplot(heightweight,aes(x=sex,y=heightIn))
p + geom_violin()+
geom_boxplot(width=.1,fill="black",outlier.colour=NA)+
stat_summary(fun.y=median,geom="point",fill="white",shape=21,size=2.5)

seasons <- structure(list(values = c(204, 339, 304, 434, 334, 212, 361, 
102, 298, 369, 149, 227, 278, 199, 360, 211, 219, 209, 177, 299, 
262, 285, 237, 227, 216, 229, 317, 321, 327, 123, 84, 321, 442, 
263, 225, 290, 259, 219, 244, 325, 257, 672, 762, 381, 698, 578, 
576, 386, 834, 790, 815, 736, 517, 556, 685, 781, 703, 1071, 
537, 784, 753, 790, 489, 878, 433, 742, 638, 731, 1017, 850, 
804, 612, 923, 1000, 855, 750, 921, 676, 621, 781, 703, 1054, 
156, 312, 267, 152, 352, 155, 215, 184, 186, 221, 352, 183, 307, 
353, 507, 255, 159, 109, 343, 377, 209, 260, 193, 231, 111, 167, 
233, 360, 488, 347, 208, 178, 371, 276, 263, 166, 486, 119, 153, 
315, 226, 158, 142, 78, 75, 156, 53, 103, 141, 94, 94, 55, 84, 
35, 82, 65, 150, 30, 201, 184, 94, 119, 150, 70, 63, 50, 74, 
160, 49, 52, 135, 105, 129, 75, 83, 85, 84, 85, 77, 147, 100, 
46), ind = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("spring", 
"summer", "autumn", "winter"), class = "factor", scores = structure(c(3, 
1, 2, 4), .Dim = 4L, .Dimnames = list(c("autumn", "spring", "summer", 
"winter"))))), .Names = c("values", "ind"), row.names = c(NA, 
-164L), class = "data.frame")
seasons
##     values    ind
## 1      204 spring
## 2      339 spring
## 3      304 spring
## 4      434 spring
## 5      334 spring
## 6      212 spring
## 7      361 spring
## 8      102 spring
## 9      298 spring
## 10     369 spring
## 11     149 spring
## 12     227 spring
## 13     278 spring
## 14     199 spring
## 15     360 spring
## 16     211 spring
## 17     219 spring
## 18     209 spring
## 19     177 spring
## 20     299 spring
## 21     262 spring
## 22     285 spring
## 23     237 spring
## 24     227 spring
## 25     216 spring
## 26     229 spring
## 27     317 spring
## 28     321 spring
## 29     327 spring
## 30     123 spring
## 31      84 spring
## 32     321 spring
## 33     442 spring
## 34     263 spring
## 35     225 spring
## 36     290 spring
## 37     259 spring
## 38     219 spring
## 39     244 spring
## 40     325 spring
## 41     257 spring
## 42     672 summer
## 43     762 summer
## 44     381 summer
## 45     698 summer
## 46     578 summer
## 47     576 summer
## 48     386 summer
## 49     834 summer
## 50     790 summer
## 51     815 summer
## 52     736 summer
## 53     517 summer
## 54     556 summer
## 55     685 summer
## 56     781 summer
## 57     703 summer
## 58    1071 summer
## 59     537 summer
## 60     784 summer
## 61     753 summer
## 62     790 summer
## 63     489 summer
## 64     878 summer
## 65     433 summer
## 66     742 summer
## 67     638 summer
## 68     731 summer
## 69    1017 summer
## 70     850 summer
## 71     804 summer
## 72     612 summer
## 73     923 summer
## 74    1000 summer
## 75     855 summer
## 76     750 summer
## 77     921 summer
## 78     676 summer
## 79     621 summer
## 80     781 summer
## 81     703 summer
## 82    1054 summer
## 83     156 autumn
## 84     312 autumn
## 85     267 autumn
## 86     152 autumn
## 87     352 autumn
## 88     155 autumn
## 89     215 autumn
## 90     184 autumn
## 91     186 autumn
## 92     221 autumn
## 93     352 autumn
## 94     183 autumn
## 95     307 autumn
## 96     353 autumn
## 97     507 autumn
## 98     255 autumn
## 99     159 autumn
## 100    109 autumn
## 101    343 autumn
## 102    377 autumn
## 103    209 autumn
## 104    260 autumn
## 105    193 autumn
## 106    231 autumn
## 107    111 autumn
## 108    167 autumn
## 109    233 autumn
## 110    360 autumn
## 111    488 autumn
## 112    347 autumn
## 113    208 autumn
## 114    178 autumn
## 115    371 autumn
## 116    276 autumn
## 117    263 autumn
## 118    166 autumn
## 119    486 autumn
## 120    119 autumn
## 121    153 autumn
## 122    315 autumn
## 123    226 autumn
## 124    158 winter
## 125    142 winter
## 126     78 winter
## 127     75 winter
## 128    156 winter
## 129     53 winter
## 130    103 winter
## 131    141 winter
## 132     94 winter
## 133     94 winter
## 134     55 winter
## 135     84 winter
## 136     35 winter
## 137     82 winter
## 138     65 winter
## 139    150 winter
## 140     30 winter
## 141    201 winter
## 142    184 winter
## 143     94 winter
## 144    119 winter
## 145    150 winter
## 146     70 winter
## 147     63 winter
## 148     50 winter
## 149     74 winter
## 150    160 winter
## 151     49 winter
## 152     52 winter
## 153    135 winter
## 154    105 winter
## 155    129 winter
## 156     75 winter
## 157     83 winter
## 158     85 winter
## 159     84 winter
## 160     85 winter
## 161     77 winter
## 162    147 winter
## 163    100 winter
## 164     46 winter
library(beanplot)
boxplot(seasons$values~seasons$ind, ylim= c(0,1200))
beanplot(seasons$values~seasons$ind, ylim= c(0,1200), col = c("#CAB2D6", "#33A02C","#B2DF8A"), border = "#CAB2D6", side="second", add=T)

ggplot(seasons, aes(x=ind, y=values))+
geom_boxplot()+
geom_violin(fill='lightblue',alpha=0.5)+
geom_jitter(position = position_jitter(width = .1))

#install.packages("devtools")
#library(devtools)
#install_github("easyGgplot2", "kassambara")

library(easyGgplot2)
## Loading required package: plyr
## Loading required package: grid
#create a numeric vector
numVector<-rnorm(100)
head(numVector)
## [1] -0.1264709 -0.5723197 -0.9387053 -0.2672664  1.6596299 -0.6150894
#data.frame
df <- ToothGrowth
head(df)
##    len supp dose
## 1  4.2   VC  0.5
## 2 11.5   VC  0.5
## 3  7.3   VC  0.5
## 4  5.8   VC  0.5
## 5  6.4   VC  0.5
## 6 10.0   VC  0.5
#Basic violinplot #Violinplot from a single numeric vector 
ggplot2.violinplot(data=numVector)

#Basic violinplot from the vector "len"
ggplot2.violinplot(data=df, xName='dose',yName='len')

#Change the orientation: Horizontal violinplot
ggplot2.violinplot(data=df, xName='dose',yName='len',
                orientation="horizontal")

#set trim to FALSE. If TRUE (default), trim the tails of the violins to the range of the data.
#If FALSE, don't trim the tails
ggplot2.violinplot(data=df, xName='dose',yName='len', trim=FALSE)

#Violinplot with mean point
ggplot2.violinplot(data=df, xName='dose',yName='len',
                addMean=TRUE, meanPointShape=23, meanPointSize=4,
                meanPointColor="black", meanPointFill="blue")

#Violinplot with centered dots
ggplot2.violinplot(data=df, xName='dose',yName='len',
                addDot=TRUE, dotSize=1, dotPosition="center")
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

#Violinplot with jittered dots. jitter: degree of jitter in x direction
ggplot2.violinplot(data=df, xName='dose',yName='len',
                addDot=TRUE, dotSize=1.7, dotPosition="jitter", jitter=0.2)

#Change the violinplot line color and line type
ggplot2.violinplot(data=df, xName='dose',yName='len',
                colour="red", linetype="dotted")

#Titles 
#Change main title and axis titles
ggplot2.violinplot(data=df, xName='dose',yName='len',
                mainTitle="Plot of length according\n to the dose",
                xtitle="Dose (mg)", ytitle="Length")

#Customize title styles. Possible values for the font style:'plain', 'italic', 'bold', 'bold.italic'.
ggplot2.violinplot(data=df, xName='dose',yName='len',
                xtitle="Dose (mg)", ytitle="Length",
                mainTitle="Plot of length according\n to the dose",
                mainTitleFont=c(14,"bold.italic", "red"),
                xtitleFont=c(14,"bold", "#993333"), ytitleFont=c(14,"bold", "#993333"))

#Hide x an y axis titles
ggplot2.violinplot(data=df, xName='dose',yName='len',
                xShowTitle=FALSE, yShowTitle=FALSE)

#Axis ticks #Axis ticks labels and orientaion
ggplot2.violinplot(data=df, xName='dose',yName='len',
                xShowTitle=FALSE, yShowTitle=FALSE,
                xTickLabelFont=c(14,"bold", "#993333"),yTickLabelFont=c(14,"bold", "#993333"),
                xtickLabelRotation=45, ytickLabelRotation=45)

#Hide axis tick labels
ggplot2.violinplot(data=df, xName='dose',yName='len',
                xShowTitle=FALSE, yShowTitle=FALSE,
                xShowTickLabel=FALSE, yShowTickLabel=FALSE)

#Hide axis ticks
ggplot2.violinplot(data=df, xName='dose',yName='len',
                xShowTitle=FALSE, yShowTitle=FALSE,
                xShowTickLabel=FALSE, yShowTickLabel=FALSE,hideAxisTicks=TRUE)

#AxisLine : a vector of length 3 indicating the size, the line type and the color of axis lines
ggplot2.violinplot(data=df, xName='dose',yName='len', axisLine=c(1, "solid", "darkblue"))

#Background and colors 
#Change violinplot background and fill colors #Change background color to "white". Default background color is "gray"
ggplot2.violinplot(data=df, xName='dose',yName='len',
                backgroundColor="white")

#Change background color to "lightblue" and grid color to "white"
ggplot2.violinplot(data=df, xName='dose',yName='len',
                backgroundColor="lightblue", gridColor="white")

#Change plot fill color
ggplot2.violinplot(data=df, xName='dose',yName='len',
                backgroundColor="white", fill='#FFAAD4')

#Remove grid; remove top and right borders around the plot; change  axis lines
ggplot2.violinplot(data=df, xName='dose',yName='len',
                backgroundColor="white", fill='#FFAAD4',
                removePanelGrid=TRUE,removePanelBorder=TRUE,
                axisLine=c(0.5, "solid", "black"))

#Change the order of items in the legend, remove plot legend #Change the order of items in the legend
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='dose',legendItemOrder=c("2", "1", "0.5"))

#Remove plot legend
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='dose',showLegend=FALSE)

#Axis scales 
#Possible values for y axis scale are "none", "log2" and log10. Default value is "none".
#Change y axis limit
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='dose',ylim=c(0,50))

#Log scale. yScale="log2". (possible value="none", "log2" and "log10")
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='dose',yScale="log2")

#Create a customized plots with few R code #Customized violinplot with centered dot plot
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='dose',
                groupColors=c('#999999','#E69F00','#56B4E9'), showLegend=FALSE,
                backgroundColor="white", xtitle="Dose (mg)", ytitle="length", 
                mainTitle="Plot of length according\n to vitamin C/Orange juice dose",
                addDot=TRUE, dotSize=1)
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

#Remove grid; Remove Top and right border around the plot
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='dose',
                groupColors=c('#999999','#E69F00','#56B4E9'), showLegend=FALSE,
                backgroundColor="white", xtitle="Dose (mg)", ytitle="length", 
                mainTitle="Plot of length according\n to vitamin C/Orange juice dose",
                addDot=TRUE, dotSize=1,
                removePanelGrid=TRUE,removePanelBorder=TRUE,
                axisLine=c(0.5, "solid", "black"))
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

#Faceting with one variable 
#Facet according to the supp variable
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='supp', legendPosition="top",
            faceting=TRUE, facetingVarNames="supp") 

#Change the direction. possible values are "vertical", horizontal". default is vertical.
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='supp', legendPosition="top",
            faceting=TRUE, facetingVarNames="supp", facetingDirection="horizontal") 

#Faceting with two variables 
#Facet by two variables: dose and supp.
#Rows are dose and columns are supp
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='supp', legendPosition="top",
            faceting=TRUE, facetingVarNames=c("dose","supp"))

#Facet by two variables: reverse the order of the 2 variables
#Rows are supp and columns are dose
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='supp', legendPosition="top",
            faceting=TRUE, facetingVarNames=c("supp", "dose"))

#Facet scales 
#By default, all the panels have the same scale (facetingScales="fixed"). They can be made independent, by setting scales to free, free_x, or free_y.
#Facet with free scales
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='supp', legendPosition="top",
            faceting=TRUE, facetingVarNames=c("dose", "supp"), facetingScales="free")

#Facet label apperance 
#Change facet text font
#Possible values for the font style:'plain', 'italic', 'bold', 'bold.italic'.
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='supp', legendPosition="top",
            faceting=TRUE, facetingVarNames=c("dose","supp"),
            facetingFont=c(12, 'bold.italic', "red"))

#Change the apperance of the rectangle around facet label
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='supp', legendPosition="top",
            faceting=TRUE, facetingVarNames=c("dose","supp"),
            facetingRect=list(background="white", lineType="solid", lineColor="black", lineSize=1.5))

usage

ggplot2.violinplot(data, xName=NULL, yName=NULL, groupName=NULL, addMean=FALSE, meanPointShape=23, meanPointSize=4, meanPointColor=“black”, meanPointFill=“blue”, addDot=FALSE, dotSize=1, dotPosition=c(“center”, “jitter”), jitter=0.2, groupColors=NULL, brewerPalette=NULL,…)

男女別の体重の箱ひげ図とヒストグラムがご覧いただけたでしょうか?

内部のボックスが箱ひげ図、外部のアメーバのようなものがヒストグラムを表しています。 箱ひげ図だけでも、四分位点の状況は把握できるのですが、実際にどんな分布なのかは読み取ることができません。が、ヴァイオリンプロットなら、それがわかるのです!!

素晴らしい。統計の教科書にも必須でのせるべきだと思うのは私だけでしょうか。

geom_violin()だけで、デフォルトのプロットができますが、boxplotとstat_summaryの引数の設定によって、 かゆいところにまで手が届く仕様となっています。

ちょっとわかりづらいのですが、ボックス内の白丸はmean値です。箱ひげ図にはmedianのラインが引かれているので、平均値と中央値の差異も一度にわかってしまうお得なプロットです。

ぜひ、みなさんもggplot2のヴァイオリンプロットを使って、充実した可視化ライフをお過ごしください。