ggplot2入門 (2章 qpolt()関数を使ってみる)

担当:とよかわ わたる

準備とデータセット

library(ggplot2)
summary(diamonds)
##      carat              cut        color        clarity     
##  Min.   :0.200   Fair     : 1610   D: 6775   SI1    :13065  
##  1st Qu.:0.400   Good     : 4906   E: 9797   VS2    :12258  
##  Median :0.700   Very Good:12082   F: 9542   SI2    : 9194  
##  Mean   :0.798   Premium  :13791   G:11292   VS1    : 8171  
##  3rd Qu.:1.040   Ideal    :21551   H: 8304   VVS2   : 5066  
##  Max.   :5.010                     I: 5422   VVS1   : 3655  
##                                    J: 2808   (Other): 2531  
##      depth          table          price             x        
##  Min.   :43.0   Min.   :43.0   Min.   :  326   Min.   : 0.00  
##  1st Qu.:61.0   1st Qu.:56.0   1st Qu.:  950   1st Qu.: 4.71  
##  Median :61.8   Median :57.0   Median : 2401   Median : 5.70  
##  Mean   :61.7   Mean   :57.5   Mean   : 3933   Mean   : 5.73  
##  3rd Qu.:62.5   3rd Qu.:59.0   3rd Qu.: 5324   3rd Qu.: 6.54  
##  Max.   :79.0   Max.   :95.0   Max.   :18823   Max.   :10.74  
##                                                               
##        y               z        
##  Min.   : 0.00   Min.   : 0.00  
##  1st Qu.: 4.72   1st Qu.: 2.91  
##  Median : 5.71   Median : 3.53  
##  Mean   : 5.73   Mean   : 3.54  
##  3rd Qu.: 6.54   3rd Qu.: 4.04  
##  Max.   :58.90   Max.   :31.80  
##                                 
set.seed(1410)
dsmall <- diamonds[sample(nrow(diamonds), 100), ]
summary(dsmall)
##      carat              cut     color     clarity       depth     
##  Min.   :0.230   Fair     : 3   D:16   VS2    :22   Min.   :56.9  
##  1st Qu.:0.417   Good     : 4   E:19   VS1    :20   1st Qu.:61.2  
##  Median :0.740   Very Good:28   F:15   SI2    :19   Median :61.8  
##  Mean   :0.865   Premium  :20   G:18   SI1    :19   Mean   :61.8  
##  3rd Qu.:1.202   Ideal    :45   H:17   VVS2   : 9   3rd Qu.:62.5  
##  Max.   :2.500                  I: 8   VVS1   : 8   Max.   :66.0  
##                                 J: 7   (Other): 3                 
##      table          price             x              y       
##  Min.   :54.0   Min.   :  353   Min.   :3.95   Min.   :3.98  
##  1st Qu.:56.0   1st Qu.: 1001   1st Qu.:4.79   1st Qu.:4.78  
##  Median :57.0   Median : 3022   Median :5.82   Median :5.79  
##  Mean   :57.4   Mean   : 4533   Mean   :5.86   Mean   :5.86  
##  3rd Qu.:59.0   3rd Qu.: 6712   3rd Qu.:6.71   3rd Qu.:6.76  
##  Max.   :65.0   Max.   :17841   Max.   :8.56   Max.   :8.48  
##                                                              
##        z       
##  Min.   :2.44  
##  1st Qu.:2.95  
##  Median :3.60  
##  Mean   :3.62  
##  3rd Qu.:4.24  
##  Max.   :5.46  
##                

2.2 基本

qplot(carat, price, data = diamonds)

plot of chunk unnamed-chunk-2

qplot(log(carat), log(price), data = diamonds)

plot of chunk unnamed-chunk-2

qplot(carat, x * y * z, data = diamonds)

plot of chunk unnamed-chunk-2

2.4 カラー、サイズ、形状などの審美的属性

qplot(carat, price, data = dsmall, colour = color)

plot of chunk unnamed-chunk-3

qplot(carat, price, data = dsmall, shape = cut)

plot of chunk unnamed-chunk-3

透明度

qplot(carat, price, data = diamonds, alpha = I(1/10))

plot of chunk unnamed-chunk-4

qplot(carat, price, data = diamonds, alpha = I(1/100))

plot of chunk unnamed-chunk-4

2.5.1 プロットに平滑化線を追加する

qplot(carat, price, data = dsmall, geom = c("point", "smooth"))
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-5

qplot(carat, price, data = dsmall, geom = c("point", "smooth"), span = 0.2)
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-5

qplot(carat, price, data = dsmall, geom = c("point", "smooth"), span = 1)
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-5

一般化加法モデルによるあてはめ

library(mgcv)
## This is mgcv 1.7-13. For overview type 'help("mgcv-package")'.
qplot(carat, price, data = dsmall, geom = c("point", "smooth"), method = "gam", 
    formula = y ~ s(x))

plot of chunk unnamed-chunk-6

qplot(carat, price, data = dsmall, geom = c("point", "smooth"), method = "gam", 
    formula = y ~ s(x, bs = "cs"))

plot of chunk unnamed-chunk-6

線形モデル

library(splines)
qplot(carat, price, data = dsmall, geom = c("point", "smooth"), method = "lm")

plot of chunk unnamed-chunk-7

qplot(carat, price, data = dsmall, geom = c("point", "smooth"), method = "lm", 
    formula = y ~ ns(x, 5))

plot of chunk unnamed-chunk-7

library(MASS)
qplot(carat, price, data = dsmall, geom = c("point", "smooth"), method = "rlm", 
    formula = y ~ ns(x, 5))

plot of chunk unnamed-chunk-7

2.5.2 箱ひげ図とジッター点

qplot(color, price/carat, data = diamonds, geom = "jitter", alpha = I(1/100))

plot of chunk unnamed-chunk-8

qplot(color, price/carat, data = diamonds, geom = "boxplot")

plot of chunk unnamed-chunk-8

2.5.3 ヒストグラムと密度プロット

qplot(carat, data = diamonds, geom = "histogram", binwidth = 0.1, 
    xlim = c(0, 3))

plot of chunk unnamed-chunk-9

qplot(carat, data = diamonds, geom = "histogram", binwidth = 0.01, 
    xlim = c(0, 3))

plot of chunk unnamed-chunk-9

qplot(carat, data = diamonds, geom = "density")

plot of chunk unnamed-chunk-9

部分集合ごとの分布を比較

qplot(carat, data = diamonds, geom = "histogram", binwidth = 0.1, 
    colour = color)

plot of chunk unnamed-chunk-10

qplot(carat, data = diamonds, geom = "density", fill = color)

plot of chunk unnamed-chunk-10

2.5.4 バー・プロット

qplot(color, data = diamonds, geom = "bar")

plot of chunk unnamed-chunk-11

qplot(color, data = diamonds, geom = "bar", weight = carat) + scale_y_continuous("carat")

plot of chunk unnamed-chunk-11

2.5.5 時系列データのライン・プロットとパス・プロット

qplot(date, unemploy/pop, data = economics, geom = "line")

plot of chunk unnamed-chunk-12

qplot(date, uempmed, data = economics, geom = "line")

plot of chunk unnamed-chunk-12


year <- function(x) as.POSIXlt(x)$year + 1900
qplot(unemploy/pop, uempmed, data = economics, geom = "path", colour = year(date)) + 
    scale_area()

plot of chunk unnamed-chunk-12

2.6 ファセット

qplot(carat, data = diamonds, facets = color ~ ., geom = "histogram", 
    binwidth = 0.1, xlim = c(0, 3))

plot of chunk unnamed-chunk-13

# ..density..
qplot(carat, ..density.., data = diamonds, facets = color ~ ., geom = "histogram", 
    binwidth = 0.1, xlim = c(0, 3))

plot of chunk unnamed-chunk-13

2.7 その他のオプション

qplot(carat, price/carat, data = dsmall, ylab = expression(frac(price, 
    carat)), xlab = "Weight (carat)", main = "small diamonds", xlim = c(0.1, 
    1))
## Warning message: Removed 35 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-14