# パッケージの読み込み
library(ggplot2)
library(gcookbook)
#用いるデータセット
data(heightweight)
head(heightweight)
## sex ageYear ageMonth heightIn weightLb
## 1 f 11.92 143 56.3 85.0
## 2 f 12.92 155 62.3 105.0
## 3 f 12.75 153 63.3 108.0
## 4 f 13.42 161 59.0 92.0
## 5 f 15.92 191 62.5 112.5
## 6 f 14.25 171 62.5 112.0
str(heightweight)
## 'data.frame': 236 obs. of 5 variables:
## $ sex : Factor w/ 2 levels "f","m": 1 1 1 1 1 1 1 1 1 1 ...
## $ ageYear : num 11.9 12.9 12.8 13.4 15.9 ...
## $ ageMonth: int 143 155 153 161 191 171 185 142 160 140 ...
## $ heightIn: num 56.3 62.3 63.3 59 62.5 62.5 59 56.5 62 53.8 ...
## $ weightLb: num 85 105 108 92 112 ...
# プロット
p <- ggplot(heightweight,aes(x=sex,y=heightIn))
p + geom_violin()+
geom_boxplot(width=.1,fill="black",outlier.colour=NA)+
stat_summary(fun.y=median,geom="point",fill="white",shape=21,size=2.5)
seasons <- structure(list(values = c(204, 339, 304, 434, 334, 212, 361,
102, 298, 369, 149, 227, 278, 199, 360, 211, 219, 209, 177, 299,
262, 285, 237, 227, 216, 229, 317, 321, 327, 123, 84, 321, 442,
263, 225, 290, 259, 219, 244, 325, 257, 672, 762, 381, 698, 578,
576, 386, 834, 790, 815, 736, 517, 556, 685, 781, 703, 1071,
537, 784, 753, 790, 489, 878, 433, 742, 638, 731, 1017, 850,
804, 612, 923, 1000, 855, 750, 921, 676, 621, 781, 703, 1054,
156, 312, 267, 152, 352, 155, 215, 184, 186, 221, 352, 183, 307,
353, 507, 255, 159, 109, 343, 377, 209, 260, 193, 231, 111, 167,
233, 360, 488, 347, 208, 178, 371, 276, 263, 166, 486, 119, 153,
315, 226, 158, 142, 78, 75, 156, 53, 103, 141, 94, 94, 55, 84,
35, 82, 65, 150, 30, 201, 184, 94, 119, 150, 70, 63, 50, 74,
160, 49, 52, 135, 105, 129, 75, 83, 85, 84, 85, 77, 147, 100,
46), ind = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("spring",
"summer", "autumn", "winter"), class = "factor", scores = structure(c(3,
1, 2, 4), .Dim = 4L, .Dimnames = list(c("autumn", "spring", "summer",
"winter"))))), .Names = c("values", "ind"), row.names = c(NA,
-164L), class = "data.frame")
seasons
## values ind
## 1 204 spring
## 2 339 spring
## 3 304 spring
## 4 434 spring
## 5 334 spring
## 6 212 spring
## 7 361 spring
## 8 102 spring
## 9 298 spring
## 10 369 spring
## 11 149 spring
## 12 227 spring
## 13 278 spring
## 14 199 spring
## 15 360 spring
## 16 211 spring
## 17 219 spring
## 18 209 spring
## 19 177 spring
## 20 299 spring
## 21 262 spring
## 22 285 spring
## 23 237 spring
## 24 227 spring
## 25 216 spring
## 26 229 spring
## 27 317 spring
## 28 321 spring
## 29 327 spring
## 30 123 spring
## 31 84 spring
## 32 321 spring
## 33 442 spring
## 34 263 spring
## 35 225 spring
## 36 290 spring
## 37 259 spring
## 38 219 spring
## 39 244 spring
## 40 325 spring
## 41 257 spring
## 42 672 summer
## 43 762 summer
## 44 381 summer
## 45 698 summer
## 46 578 summer
## 47 576 summer
## 48 386 summer
## 49 834 summer
## 50 790 summer
## 51 815 summer
## 52 736 summer
## 53 517 summer
## 54 556 summer
## 55 685 summer
## 56 781 summer
## 57 703 summer
## 58 1071 summer
## 59 537 summer
## 60 784 summer
## 61 753 summer
## 62 790 summer
## 63 489 summer
## 64 878 summer
## 65 433 summer
## 66 742 summer
## 67 638 summer
## 68 731 summer
## 69 1017 summer
## 70 850 summer
## 71 804 summer
## 72 612 summer
## 73 923 summer
## 74 1000 summer
## 75 855 summer
## 76 750 summer
## 77 921 summer
## 78 676 summer
## 79 621 summer
## 80 781 summer
## 81 703 summer
## 82 1054 summer
## 83 156 autumn
## 84 312 autumn
## 85 267 autumn
## 86 152 autumn
## 87 352 autumn
## 88 155 autumn
## 89 215 autumn
## 90 184 autumn
## 91 186 autumn
## 92 221 autumn
## 93 352 autumn
## 94 183 autumn
## 95 307 autumn
## 96 353 autumn
## 97 507 autumn
## 98 255 autumn
## 99 159 autumn
## 100 109 autumn
## 101 343 autumn
## 102 377 autumn
## 103 209 autumn
## 104 260 autumn
## 105 193 autumn
## 106 231 autumn
## 107 111 autumn
## 108 167 autumn
## 109 233 autumn
## 110 360 autumn
## 111 488 autumn
## 112 347 autumn
## 113 208 autumn
## 114 178 autumn
## 115 371 autumn
## 116 276 autumn
## 117 263 autumn
## 118 166 autumn
## 119 486 autumn
## 120 119 autumn
## 121 153 autumn
## 122 315 autumn
## 123 226 autumn
## 124 158 winter
## 125 142 winter
## 126 78 winter
## 127 75 winter
## 128 156 winter
## 129 53 winter
## 130 103 winter
## 131 141 winter
## 132 94 winter
## 133 94 winter
## 134 55 winter
## 135 84 winter
## 136 35 winter
## 137 82 winter
## 138 65 winter
## 139 150 winter
## 140 30 winter
## 141 201 winter
## 142 184 winter
## 143 94 winter
## 144 119 winter
## 145 150 winter
## 146 70 winter
## 147 63 winter
## 148 50 winter
## 149 74 winter
## 150 160 winter
## 151 49 winter
## 152 52 winter
## 153 135 winter
## 154 105 winter
## 155 129 winter
## 156 75 winter
## 157 83 winter
## 158 85 winter
## 159 84 winter
## 160 85 winter
## 161 77 winter
## 162 147 winter
## 163 100 winter
## 164 46 winter
library(beanplot)
boxplot(seasons$values~seasons$ind, ylim= c(0,1200))
beanplot(seasons$values~seasons$ind, ylim= c(0,1200), col = c("#CAB2D6", "#33A02C","#B2DF8A"), border = "#CAB2D6", side="second", add=T)
ggplot(seasons, aes(x=ind, y=values))+
geom_boxplot()+
geom_violin(fill='lightblue',alpha=0.5)+
geom_jitter(position = position_jitter(width = .1))
#install.packages("devtools")
#library(devtools)
#install_github("easyGgplot2", "kassambara")
library(easyGgplot2)
## Loading required package: plyr
## Loading required package: grid
#create a numeric vector
numVector<-rnorm(100)
head(numVector)
## [1] -0.1264709 -0.5723197 -0.9387053 -0.2672664 1.6596299 -0.6150894
#data.frame
df <- ToothGrowth
head(df)
## len supp dose
## 1 4.2 VC 0.5
## 2 11.5 VC 0.5
## 3 7.3 VC 0.5
## 4 5.8 VC 0.5
## 5 6.4 VC 0.5
## 6 10.0 VC 0.5
#Basic violinplot #Violinplot from a single numeric vector
ggplot2.violinplot(data=numVector)
#Basic violinplot from the vector "len"
ggplot2.violinplot(data=df, xName='dose',yName='len')
#Change the orientation: Horizontal violinplot
ggplot2.violinplot(data=df, xName='dose',yName='len',
orientation="horizontal")
#set trim to FALSE. If TRUE (default), trim the tails of the violins to the range of the data.
#If FALSE, don't trim the tails
ggplot2.violinplot(data=df, xName='dose',yName='len', trim=FALSE)
#Violinplot with mean point
ggplot2.violinplot(data=df, xName='dose',yName='len',
addMean=TRUE, meanPointShape=23, meanPointSize=4,
meanPointColor="black", meanPointFill="blue")
#Violinplot with centered dots
ggplot2.violinplot(data=df, xName='dose',yName='len',
addDot=TRUE, dotSize=1, dotPosition="center")
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
#Violinplot with jittered dots. jitter: degree of jitter in x direction
ggplot2.violinplot(data=df, xName='dose',yName='len',
addDot=TRUE, dotSize=1.7, dotPosition="jitter", jitter=0.2)
#Change the violinplot line color and line type
ggplot2.violinplot(data=df, xName='dose',yName='len',
colour="red", linetype="dotted")
#Titles
#Change main title and axis titles
ggplot2.violinplot(data=df, xName='dose',yName='len',
mainTitle="Plot of length according\n to the dose",
xtitle="Dose (mg)", ytitle="Length")
#Customize title styles. Possible values for the font style:'plain', 'italic', 'bold', 'bold.italic'.
ggplot2.violinplot(data=df, xName='dose',yName='len',
xtitle="Dose (mg)", ytitle="Length",
mainTitle="Plot of length according\n to the dose",
mainTitleFont=c(14,"bold.italic", "red"),
xtitleFont=c(14,"bold", "#993333"), ytitleFont=c(14,"bold", "#993333"))
#Hide x an y axis titles
ggplot2.violinplot(data=df, xName='dose',yName='len',
xShowTitle=FALSE, yShowTitle=FALSE)
#Axis ticks #Axis ticks labels and orientaion
ggplot2.violinplot(data=df, xName='dose',yName='len',
xShowTitle=FALSE, yShowTitle=FALSE,
xTickLabelFont=c(14,"bold", "#993333"),yTickLabelFont=c(14,"bold", "#993333"),
xtickLabelRotation=45, ytickLabelRotation=45)
#Hide axis tick labels
ggplot2.violinplot(data=df, xName='dose',yName='len',
xShowTitle=FALSE, yShowTitle=FALSE,
xShowTickLabel=FALSE, yShowTickLabel=FALSE)
#Hide axis ticks
ggplot2.violinplot(data=df, xName='dose',yName='len',
xShowTitle=FALSE, yShowTitle=FALSE,
xShowTickLabel=FALSE, yShowTickLabel=FALSE,hideAxisTicks=TRUE)
#AxisLine : a vector of length 3 indicating the size, the line type and the color of axis lines
ggplot2.violinplot(data=df, xName='dose',yName='len', axisLine=c(1, "solid", "darkblue"))
#Background and colors
#Change violinplot background and fill colors #Change background color to "white". Default background color is "gray"
ggplot2.violinplot(data=df, xName='dose',yName='len',
backgroundColor="white")
#Change background color to "lightblue" and grid color to "white"
ggplot2.violinplot(data=df, xName='dose',yName='len',
backgroundColor="lightblue", gridColor="white")
#Change plot fill color
ggplot2.violinplot(data=df, xName='dose',yName='len',
backgroundColor="white", fill='#FFAAD4')
#Remove grid; remove top and right borders around the plot; change axis lines
ggplot2.violinplot(data=df, xName='dose',yName='len',
backgroundColor="white", fill='#FFAAD4',
removePanelGrid=TRUE,removePanelBorder=TRUE,
axisLine=c(0.5, "solid", "black"))
#Change the order of items in the legend, remove plot legend #Change the order of items in the legend
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='dose',legendItemOrder=c("2", "1", "0.5"))
#Remove plot legend
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='dose',showLegend=FALSE)
#Axis scales
#Possible values for y axis scale are "none", "log2" and log10. Default value is "none".
#Change y axis limit
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='dose',ylim=c(0,50))
#Log scale. yScale="log2". (possible value="none", "log2" and "log10")
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='dose',yScale="log2")
#Create a customized plots with few R code #Customized violinplot with centered dot plot
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='dose',
groupColors=c('#999999','#E69F00','#56B4E9'), showLegend=FALSE,
backgroundColor="white", xtitle="Dose (mg)", ytitle="length",
mainTitle="Plot of length according\n to vitamin C/Orange juice dose",
addDot=TRUE, dotSize=1)
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
#Remove grid; Remove Top and right border around the plot
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='dose',
groupColors=c('#999999','#E69F00','#56B4E9'), showLegend=FALSE,
backgroundColor="white", xtitle="Dose (mg)", ytitle="length",
mainTitle="Plot of length according\n to vitamin C/Orange juice dose",
addDot=TRUE, dotSize=1,
removePanelGrid=TRUE,removePanelBorder=TRUE,
axisLine=c(0.5, "solid", "black"))
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
#Faceting with one variable
#Facet according to the supp variable
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='supp', legendPosition="top",
faceting=TRUE, facetingVarNames="supp")
#Change the direction. possible values are "vertical", horizontal". default is vertical.
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='supp', legendPosition="top",
faceting=TRUE, facetingVarNames="supp", facetingDirection="horizontal")
#Faceting with two variables
#Facet by two variables: dose and supp.
#Rows are dose and columns are supp
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='supp', legendPosition="top",
faceting=TRUE, facetingVarNames=c("dose","supp"))
#Facet by two variables: reverse the order of the 2 variables
#Rows are supp and columns are dose
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='supp', legendPosition="top",
faceting=TRUE, facetingVarNames=c("supp", "dose"))
#Facet scales
#By default, all the panels have the same scale (facetingScales="fixed"). They can be made independent, by setting scales to free, free_x, or free_y.
#Facet with free scales
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='supp', legendPosition="top",
faceting=TRUE, facetingVarNames=c("dose", "supp"), facetingScales="free")
#Facet label apperance
#Change facet text font
#Possible values for the font style:'plain', 'italic', 'bold', 'bold.italic'.
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='supp', legendPosition="top",
faceting=TRUE, facetingVarNames=c("dose","supp"),
facetingFont=c(12, 'bold.italic', "red"))
#Change the apperance of the rectangle around facet label
ggplot2.violinplot(data=df, xName='dose',yName='len', groupName='supp', legendPosition="top",
faceting=TRUE, facetingVarNames=c("dose","supp"),
facetingRect=list(background="white", lineType="solid", lineColor="black", lineSize=1.5))
usage
ggplot2.violinplot(data, xName=NULL, yName=NULL, groupName=NULL, addMean=FALSE, meanPointShape=23, meanPointSize=4, meanPointColor=“black”, meanPointFill=“blue”, addDot=FALSE, dotSize=1, dotPosition=c(“center”, “jitter”), jitter=0.2, groupColors=NULL, brewerPalette=NULL,…)
男女別の体重の箱ひげ図とヒストグラムがご覧いただけたでしょうか?
内部のボックスが箱ひげ図、外部のアメーバのようなものがヒストグラムを表しています。 箱ひげ図だけでも、四分位点の状況は把握できるのですが、実際にどんな分布なのかは読み取ることができません。が、ヴァイオリンプロットなら、それがわかるのです!!
素晴らしい。統計の教科書にも必須でのせるべきだと思うのは私だけでしょうか。
geom_violin()だけで、デフォルトのプロットができますが、boxplotとstat_summaryの引数の設定によって、 かゆいところにまで手が届く仕様となっています。
ちょっとわかりづらいのですが、ボックス内の白丸はmean値です。箱ひげ図にはmedianのラインが引かれているので、平均値と中央値の差異も一度にわかってしまうお得なプロットです。
ぜひ、みなさんもggplot2のヴァイオリンプロットを使って、充実した可視化ライフをお過ごしください。